代码实现tan graph model for classification_番外篇:GCN实现子图匹配

代码实现tan graph model for classification_番外篇:GCN实现子图匹配_第1张图片
应知友 @MeisterMorxrc 之邀,更新GNN实现论文《The Graph Neural Network Model》中的子图匹配任务,使用比较简单的GCN模型进行实现,模型框架为Pytorch。代码已更新在Github。

首先就是生成数据,数据集的格式为

其中,
为第
数据,
的数据结构为

任务说明:给定子图sub_graph,训练集

,模型目标就是对其中的graph中的节点进行分类。如果
包含sub_graph和节点
,且节点
属于sub_graph中的一个节点,那么节点v的标签为1,反之,标签为-1。模型就是要预测每个节点的标签是1还是-1。

其它说明:这里使用的数据直接按照论文《The Graph Neural Network Model》所述的子图匹配任务进行实现,节点的特征就是数字1-10。节点集合使用节点特征的列表表示,边集合中的每一条边(i,j)中的i和j表示该边两端的节点在节点列表中对应的索引。

首先生成graph数据,生成数据这里原论文指出的是先生成一个随机的graph,然后将subgraph插入到随机graph里面得到训练数据,之后再使用暴力搜索进行搜索子图,但是这里为了简单起见,忽略暴力搜索过程。

'''
用于产生一个graph数据,首先随机生成若干个节点,然后随机对这些节点
进行连接,得到一个随机的graph,然后将subgraph插入产生的随机graph,
得到graph数据。
Input :
    sub_graph : 子图数据
    node_num : 生成随机graph的节点个数
    edge_num : 生成随机graph的边条数
    ins_edge_num : 将subgraph插入随机graph时,连接到随机graph的边条数
'''
def genGraph(sub_graph, node_num, edge_num, ins_edge_num):
    nodes_list = list(np.random.randint(low=1, high=11, size=(node_num)))
    label_list = [-1] * len(nodes_list)
    edges_list = []
    edge_proba = edge_num / (node_num * (node_num - 1) / 2)
    for n in range(node_num-1):
        end_nodes = np.random.choice(a=np.arange(n+1, node_num), 
                                     size=(round(edge_proba * (node_num-1-n))),
                                     replace=False)
        edges_list.extend([(n, e) for e in end_nodes])
    # Insert subgraph
    nodes_list.extend(sub_graph[0])  # add subgraph nodes
    label_list.extend([1]*len(sub_graph[0]))
    head_nodes = np.random.choice(a=np.arange(node_num),
                                  size=(ins_edge_num))
    tail_nodes = np.random.choice(a=np.arange(node_num, len(nodes_list)),
                                  size=(ins_edge_num))
    edges_list.extend([(i,j) for i,j in zip(head_nodes, tail_nodes)])
    edges_list.extend([(i+node_num,j+node_num) for i,j in sub_graph[1]])
    return (nodes_list, edges_list, label_list)
    
'''
Definition of subgraph and dataset generation
'''
np.random.seed(0)
N = 600          # graph dataset length
node_nums = [5, 10, 15, 20]
ins_nums = [4, 8, 12, 16]


sub_graph = ([1,5,5,8],[(0,1),(0,3),(1,3),(2,3)])  # (Node_list, Edge_list)
dataset = []
for i in range(N):
    nnum = random.choice(node_nums)   # node_num
    ins_en = random.choice(ins_nums)# ins_edge_num
    dataset.append(genGraph(sub_graph, nnum, 2*nnum, ins_en))

然后构建GCN block和GCN模型

'''
同Linear GNN
'''
class AggrSum(nn.Module):
    def __init__(self):
        super(AggrSum, self).__init__()
    
    def forward(self, H, X_node, node_num):
        # H : (N, s) -> (V, s)
        # X_node : (N, )
        mask = torch.stack([X_node] * node_num, 0)
        mask = mask.float() - torch.unsqueeze(torch.range(0,node_num-1).float(), 1)
        mask = (mask == 0).float()
        # (V, N) * (N, s) -> (V, s)
        return torch.mm(mask, H)

'''
用于实现GCN的卷积块。
Initialize :
Input :
    in_channel : (int)输入的节点特征维度
    out_channel : (int)输出的节点特征维度
Forward :
Input :
    x : (Tensor)节点的特征矩阵,shape为(N, in_channel),N为节点个数
    edge_index : (Tensor)边矩阵,shape为(2, E),E为边个数。
Output :
    out : (Tensor)新的特征矩阵,shape为(N, out_channel)
'''
class GCNConv(nn.Module):
    def __init__(self, in_channel, out_channel):
        super(GCNConv, self).__init__()
        self.linear = nn.Linear(in_channel, out_channel)
        self.aggregation = AggrSum()
        
    def forward(self, x, edge_index):
        # Add self-connect edges
        edge_index = self.addSelfConnect(edge_index, x.shape[0])
        node_num = x.shape[0]
        
        # Apply linear transform
        x = self.linear(x)
        
        # Normalize message
        row, col = edge_index
        deg = self.calDegree(row, x.shape[0]).float()
        deg_sqrt = deg.pow(-0.5)  # (N, )
        norm = deg_sqrt[row] * deg_sqrt[col]
        
        # Node feature matrix
        tar_matrix = torch.index_select(x, dim=0, index=col)
        tar_matrix = norm.view(-1, 1) * tar_matrix  # (E, out_channel)
        # Aggregate information
        aggr =  self.aggregation(tar_matrix, row, node_num)  # (N, out_channel)
        return aggr
        
    def calDegree(self, edges, num_nodes):
        ind, deg = np.unique(edges.cpu().numpy(), return_counts=True)
        deg_tensor = torch.zeros((num_nodes, ), dtype=torch.long)
        deg_tensor[ind] = torch.from_numpy(deg)
        return deg_tensor.to(edges.device)
    
    def addSelfConnect(self, edge_index, num_nodes):
        selfconn = torch.stack([torch.range(0, num_nodes-1, dtype=torch.long)]*2,
                               dim=0).to(edge_index.device)
        return torch.cat(tensors=[edge_index, selfconn],
                         dim=1)

最后进行训练

'''
开始训练模型
'''
device = torch.device('cpu')# torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(10, 2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
# loss_fn = nn.CrossEntropyLoss()

'''
模型评估函数
'''
def evalModel(model, dataset):
    for graph in dataset:
        x, edge_index, y = getInput(graph)
        x = torch.from_numpy(x).float()
        edge_index = torch.from_numpy(edge_index).long()
        y[y < 0] = 0
        y = torch.from_numpy(y).long()
        
        acc_list = []
        _, pred = model(x, edge_index).max(dim=1)
        acc_list.append(float(pred.eq(y).sum().item())/y.shape[0])
    return sum(acc_list)/ len(acc_list)

for epoch in range(200):
    for step, graph in enumerate(dataset[:400]):
        # Get input
        x, edge_index, y = getInput(graph)
        x = torch.from_numpy(x).float().to(device)
        edge_index = torch.from_numpy(edge_index).long().to(device)
        y[y < 0] = 0
        y = torch.from_numpy(y).long().to(device)
        
        model.train()
        optimizer.zero_grad()

        # Get output
        out = model(x, edge_index)   # (N, 2)
        # Get loss
        loss = F.cross_entropy(out, y)

        # Backward
        loss.backward()
        optimizer.step()
        
        # Get predictions and calculate training accuracy
        _, pred = out.cpu().detach().max(dim=-1)  # (N)
        y = y.cpu().detach()
        correct = float(pred.eq(y).sum().item())
        acc = correct / pred.shape[0]
        print('[Epoch {}/200, step {}/400] Loss {:.4f}, train acc {:.4f}'.format(epoch, step, loss.cpu().detach().data.item(), acc))

    # Evaluation on test data every 10 epochs
    if (epoch+1) % 10 == 0:
        model.eval()
        print('Accuracy: {:.4f}'.format(evalModel(model, dataset[400:])))

训练的结果显示如下

[Epoch 0/200, step 0/400] Loss 0.7414, train acc 0.2105
[Epoch 0/200, step 1/400] Loss 0.7127, train acc 0.2857
[Epoch 0/200, step 2/400] Loss 0.7048, train acc 0.4444
...
[Epoch 9/200, step 398/400] Loss 0.3575, train acc 0.9583
[Epoch 9/200, step 399/400] Loss 0.4529, train acc 0.8571
Accuracy: 0.9583
[Epoch 10/200, step 0/400] Loss 0.3457, train acc 0.9474
[Epoch 10/200, step 1/400] Loss 0.3342, train acc 1.0000
...

你可能感兴趣的:(代码实现tan,graph,model,for,classification)