1.基本操作
1.1 读取数据集(以KarateClub为例)
import networkx as nx
G = nx.karate_club_graph()
print(type(G))
# 可视化图
nx.draw(G, with_labels=True)
1.2 节点的平均度数
def average_degree(num_edges, num_nodes):
avg_degree = 0
# 节点的平均度数为2*E/N
avg_degree = round(2*num_edges/num_nodes)
return avg_degree
num_edges = G.number_of_edges()
num_nodes = G.number_of_nodes()
avg_degree = average_degree(num_edges, num_nodes)
print("Average degree of karate club network is {}".format(avg_degree))
1.3 平均聚类系数 Clustering coefficient
def average_clustering_coefficient(G):
avg_cluster_coef = 0
# 可以调用networkX的average_clustering
avg_cluster_coef=round(nx.average_clustering(G),2)
return avg_cluster_coef
avg_cluster_coef = average_clustering_coefficient(G)
print("Average clustering coefficient of karate club network is {}".format(avg_cluster_coef))
1.4 节点0经过一次迭代后的PageRank
# 节点j的重要性 等于 指向节点j的所有节点i的重要性/节点i的出度 的和
# 考虑random surfer,将beita定义为从节点i到节点j的概率
# 所以节点j的重要性 等于 beita*指向节点j的所有节点i的重要性/节点i的出度 的和 + (1-beita)/N
# 这里只迭代一次
def one_iter_pagerank(G, beta, r0, node_id):
r1 = 0
# 遍历指定节点的所有邻居
for neighbor in nx.neighbors(G, node_id):
di = G.degree[neighbor]
r1+=beta*r0/di
r1+=(1-beta)*(1/G.number_of_nodes())
return r1
beta = 0.8
r0 = 1 / G.number_of_nodes()
node = 0
r1 = one_iter_pagerank(G, beta, r0, node)
print("The PageRank value for node 0 after one iteration is {}".format(r1))
# 也可以调用nx.pagerank
r1 = nx.pagerank(G, alpha=beta)
print("The PageRank value for node 0 after one iteration is {}".format(r1))
1.5 节点5的紧密中心性closeness centrality
# 节点u的紧密中心性 等于 1/节点u与其他所有节点v的最短距离之和
def closeness_centrality(G, node=5):
closeness = 0
# 调用networkX的shortest_path_length获得最短路径
shortest_path = nx.shortest_path_length(G,source=5)
sum_length = 0
for i in range(G.number_of_nodes()):
sum_length+=shortest_path[i]
closeness = 1/sum_length
return closeness
node = 5
closeness = closeness_centrality(G, node=node)
print("The node 5 has closeness centrality {}".format(closeness))
# 也可以直接调用networkX的closeness centrality
# 注意这个库函数做了规范化(乘了 (图节点数量-1) )
closeness = nx.closeness_centrality(G, u=5)
closeness=closeness/(G.number_of_nodes()-1)
print("The node 5 has closeness centrality {}".format(closeness))
2.图转换成Tensor类型
2.1 获取图的edge list,将其转换为torch.LongTensor
import torch
# 读取所有的边
def graph_to_edge_list(G):
edge_list = []
for edge in G.edges():
edge_list.append(edge)
return edge_list
# 转换成Tensor
def edge_list_to_tensor(edge_list):
edge_index = torch.tensor(edge_list).T
return edge_index
pos_edge_list = graph_to_edge_list(G)
pos_edge_index = edge_list_to_tensor(pos_edge_list)
print("The pos_edge_index tensor has shape {}".format(pos_edge_index.shape))
2.2 负边采样,从图中抽样一定数量的"Negative" edges(注:"Negative" edges指的是图中不存在的边)
import random
def sample_negative_edges(G, num_neg_samples):
neg_edge_list = []
# nx.non_edges对于无向图,不会出现重复的节点对(一条边只出现一次)
non_edges_one_side=list(nx.non_edges(G))
neg_edge_list_indices=random.sample(range(0,len(non_edges_one_side)),num_neg_samples)
for i in neg_edge_list_indices:
neg_edge_list.append(non_edges_one_side[i])
return neg_edge_list
# Sample 78 negative edges
neg_edge_list = sample_negative_edges(G, len(pos_edge_list))
# Transform the negative edge list to tensor
neg_edge_index = edge_list_to_tensor(neg_edge_list)
print("The neg_edge_index tensor has shape {}".format(neg_edge_index.shape))
print("The neg_edge_index : {}".format(neg_edge_index))
# Which of following edges can be negative ones?
edge_1 = (7, 1)
edge_2 = (1, 33)
edge_3 = (33, 22)
edge_4 = (0, 4)
edge_5 = (4, 2)
print('edge_1'+(" can't" if G.has_edge(edge_1[0],edge_1[1]) else ' can')+' be negative edge')
print('edge_2'+(" can't" if G.has_edge(edge_2[0],edge_2[1]) else ' can')+' be negative edge')
print('edge_3'+(" can't" if G.has_edge(edge_3[0],edge_3[1]) else ' can')+' be negative edge')
print('edge_4'+(" can't" if G.has_edge(edge_4[0],edge_4[1]) else ' can')+' be negative edge')
print('edge_5'+(" can't" if G.has_edge(edge_5[0],edge_5[1]) else ' can')+' be negative edge')
3. Node Emebedding
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
# 初始化一个embedding layer
# 每个node对应一个embedding,每个embedding都是一个8维的向量
emb_sample = nn.Embedding(num_embeddings=4, embedding_dim=8)
print('Sample embedding layer: {}'.format(emb_sample))
# 整个embedding相当于是一个矩阵,每一行存储一个对象的embedding
# embedding.weight.data是embedding矩阵的值
# 索引选择
ids = torch.LongTensor([1, 3])
print(emb_sample(ids))
# 权重矩阵
shape = emb_sample.weight.data.shape
print(shape)
# 赋值
emb_sample.weight.data = torch.ones(shape)
print(emb_sample.weight.data)
3.1 创建并初始化Enbedding
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
# 初始化一个embedding layer
# 每个node对应一个embedding,每个embedding都是一个8维的向量
emb_sample = nn.Embedding(num_embeddings=4, embedding_dim=8)
print('Sample embedding layer: {}'.format(emb_sample))
# 整个embedding相当于是一个矩阵,每一行存储一个对象的embedding
# embedding.weight.data是embedding矩阵的值
# 索引选择
ids = torch.LongTensor([1, 3])
print(emb_sample(ids))
# 权重矩阵
shape = emb_sample.weight.data.shape
print(shape)
# 赋值
emb_sample.weight.data = torch.ones(shape)
print(emb_sample.weight.data)
3.2 可视化Embedding
def visualize_emb(emb):
# 将embedding的矩阵转换成numpy,通过PCA降到二维
X = emb.weight.data.numpy()
pca = PCA(n_components=2)
X = pca.fit_transform(X)
plt.figure(figsize=(6, 6))
club1_x = []
club1_y = []
club2_x = []
club2_y = []
for node in G.nodes(data=True):
if node[1]['club'] == 'Mr. Hi':
club1_x.append(X[node[0]][0])
club1_y.append(X[node[0]][1])
else:
club2_x.append(X[node[0]][0])
club2_y.append(X[node[0]][1])
plt.scatter(club1_x, club1_y, color="red", label="Mr. Hi")
plt.scatter(club2_x, club2_y, color="blue", label="Officer")
plt.legend()
plt.show()
visualize_emb(emb)
3.3 训练Enbedding
from torch.optim import SGD
import torch.nn as nn
def accuracy(pred, label):
accu = ((pred>0.5)==label).sum().item()/(pred.shape[0])
return accu
def train(emb, loss_fn, sigmoid, train_label, train_edge):
epochs = 1000
learning_rate = 0.1
optimizer = SGD(emb.parameters(), lr=learning_rate, momentum=0.9)
for i in range(epochs):
optimizer.zero_grad()
# 得到需要训练的边的embedding
train_node_emb=emb(train_edge)
# 节点对之间embedding相乘
dot_product_result=train_node_emb[0].mul(train_node_emb[1])
dot_product_result=torch.sum(dot_product_result,1)
# sigmoid
sigmoid_result=sigmoid(dot_product_result)
loss_result=loss_fn(sigmoid_result,train_label)
if i % 50 == 0:
print(loss_result)
print(accuracy(sigmoid_result,train_label))
# Update
loss_result.backward()
optimizer.step()
loss_fn = nn.BCELoss()
sigmoid = nn.Sigmoid()
# 读取图
G = nx.karate_club_graph()
num_node = G.number_of_nodes()
num_edge = G.number_of_edges()
# 初始化embedding
emb = create_node_emb(num_node, embedding_dim=16)
# 读取图中的边
pos_edge_list = graph_to_edge_list(G)
# 转换为tensor
pos_edge_index = edge_list_to_tensor(pos_edge_list)
neg_edge_list = sample_negative_edges(G,len(pos_edge_list))
neg_edge_index =edge_list_to_tensor(neg_edge_list)
# Generate the positive and negative labels
pos_label = torch.ones(pos_edge_index.shape[1], )
neg_label = torch.zeros(neg_edge_index.shape[1], )
# Concat positive and negative labels into one tensor
train_label = torch.cat([pos_label, neg_label], dim=0)
# Concat positive and negative edges into one tensor
# Since the network is very small, we do not split the edges into val/test sets
train_edge = torch.cat([pos_edge_index, neg_edge_index], dim=1)
train(emb, loss_fn, sigmoid, train_label, train_edge)
visualize_emb(emb)