代码可能写得比较丑陋,先实现功能,求轻喷,有错的话请多多指教
导入库、全局变量
import snap
import numpy as np
import matplotlib.pyplot as plt
import random
# Setup
erdosRenyi = None
smallWorld = None
collabNet = None
def genErdosRenyi(N=5242, E=14484):
"""
:param - N: number of nodes
:param - E: number of edges
return type: snap.PUNGraph
return: Erdos-Renyi graph with N nodes and E edges
"""
############################################################################
# TODO: Your code here!
Graph = snap.TUNGraph.New()
# generate nodes
for i in range(1, N + 1):
Graph.AddNode(i)
# generate edges
edge_cnt = 0
while edge_cnt < E:
N1, N2 = random.sample([i for i in range(1, N + 1)], 2)
if N1 == N2:
continue
if Graph.AddEdge(N1, N2) == -1:
edge_cnt += 1
############################################################################
return Graph
def genCircle(N=5242):
"""
:param - N: number of nodes
return type: snap.PUNGraph
return: Circle graph with N nodes and N edges. Imagine the nodes form a
circle and each node is connected to its two direct neighbors.
"""
############################################################################
# TODO: Your code here!
Graph = snap.TUNGraph.New()
# generate nodes
for i in range(N):
Graph.AddNode(i)
# generate edges
for i in range(N):
Graph.AddEdge(i, (i + 1) % N)
############################################################################
return Graph
def connectNbrOfNbr(Graph, N=5242):
"""
:param - Graph: snap.PUNGraph object representing a circle graph on N nodes
:param - N: number of nodes
return type: snap.PUNGraph
return: Graph object with additional N edges added by connecting each node
to the neighbors of its neighbors
"""
############################################################################
# TODO: Your code here!
# generate edges
for i in range(N):
Graph.AddEdge(i, (i + 2) % N)
############################################################################
return Graph
def connectRandomNodes(Graph, M=4000):
"""
:param - Graph: snap.PUNGraph object representing an undirected graph
:param - M: number of edges to be added
return type: snap.PUNGraph
return: Graph object with additional M edges added by connecting M randomly
selected pairs of nodes not already connected.
"""
############################################################################
# TODO: Your code here!
N = Graph.GetNodes()
# generate edges
edge_cnt = 0
while edge_cnt < M:
N1, N2 = random.sample([i for i in range(N)], 2)
if N1 == N2:
continue
if Graph.AddEdge(N1, N2) == -1:
edge_cnt += 1
############################################################################
return Graph
def genSmallWorld(N=5242, E=14484):
"""
:param - N: number of nodes
:param - E: number of edges
return type: snap.PUNGraph
return: Small-World graph with N nodes and E edges
"""
Graph = genCircle(N)
Graph = connectNbrOfNbr(Graph, N)
Graph = connectRandomNodes(Graph, 4000)
return Graph
def loadCollabNet(path):
"""
:param - path: path to edge list file
return type: snap.PUNGraph
return: Graph loaded from edge list at `path and self edges removed
Do not forget to remove the self edges!
"""
############################################################################
# TODO: Your code here!
# load graph from edge list at path
Graph = snap.LoadEdgeList(snap.PUNGraph, path, 0, 1, '\t')
# remove self edges
for EI in Graph.Edges():
N_src, N_dst = EI.GetSrcNId(), EI.GetDstNId()
if N_src == N_dst:
Graph.DelEdge(N_src, N_dst)
############################################################################
return Graph
绘制度的分布直方图
def getDataPointsToPlot(Graph):
"""
:param - Graph: snap.PUNGraph object representing an undirected graph
return values:
X: list of degrees
Y: list of frequencies: Y[i] = fraction of nodes with degree X[i]
"""
############################################################################
# TODO: Your code here!
X, Y = [], []
N = Graph.GetNodes()
deg_dict = dict()
# traverse the nodes and collect the degrees
for NI in Graph.Nodes():
deg = NI.GetDeg()
if deg in deg_dict.keys():
deg_dict[deg] += 1
else:
deg_dict[deg] = 1
X = [i for i in range(max(list(deg_dict.keys())) + 1)]
for i, deg in enumerate(X):
if X[i] in deg_dict.keys():
Y.append(deg_dict[deg] / N)
else:
Y.append(0)
############################################################################
return X, Y
def Q1_1():
"""
Code for HW1 Q1.1
"""
global erdosRenyi, smallWorld, collabNet
erdosRenyi = genErdosRenyi(5242, 14484)
smallWorld = genSmallWorld(5242, 14484)
collabNet = loadCollabNet("ca-GrQc.txt")
x_erdosRenyi, y_erdosRenyi = getDataPointsToPlot(erdosRenyi)
plt.loglog(x_erdosRenyi, y_erdosRenyi, color='y', label='Erdos Renyi Network')
x_smallWorld, y_smallWorld = getDataPointsToPlot(smallWorld)
plt.loglog(x_smallWorld, y_smallWorld, linestyle='dashed', color='r', label='Small World Network')
x_collabNet, y_collabNet = getDataPointsToPlot(collabNet)
plt.loglog(x_collabNet, y_collabNet, linestyle='dotted', color='b', label='Collaboration Network')
plt.xlabel('Node Degree (log)')
plt.ylabel('Proportion of Nodes with a Given Degree (log)')
plt.title('Degree Distribution of Erdos Renyi, Small World, and Collaboration Networks')
plt.legend()
plt.show()
# Execute code for Q1.1
Q1_1()
# Problem 1.2 - Clustering Coefficient
def calcClusteringCoefficientSingleNode(Node, Graph):
"""
:param - Node: node from snap.PUNGraph object. Graph.Nodes() will give an
iterable of nodes in a graph
:param - Graph: snap.PUNGraph object representing an undirected graph
return type: float
returns: local clustering coeffient of Node
"""
############################################################################
# TODO: Your code here!
C = 0.0
deg = Node.GetDeg()
if deg < 2:
return C
neighbor_edge_cnt = 0
neigbor_list = list(Node.GetOutEdges())
for i, src in enumerate(neigbor_list):
for j in range(i + 1, len(neigbor_list)):
if Graph.IsEdge(src, neigbor_list[j]):
neighbor_edge_cnt += 1
C = (2 * neighbor_edge_cnt) / (deg * (deg - 1))
############################################################################
return C
def calcClusteringCoefficient(Graph):
"""
:param - Graph: snap.PUNGraph object representing an undirected graph
return type: float
returns: clustering coeffient of Graph
"""
############################################################################
# TODO: Your code here! If you filled out calcClusteringCoefficientSingleNode,
# you'll probably want to call it in a loop here
C = 0.0
# traverse the nodes and calculate the clustering coefficient
for NI in Graph.Nodes():
C += calcClusteringCoefficientSingleNode(NI, Graph)
C = C / Graph.GetNodes()
############################################################################
return C
def Q1_2():
"""
Code for Q1.2
"""
C_erdosRenyi = calcClusteringCoefficient(erdosRenyi)
C_smallWorld = calcClusteringCoefficient(smallWorld)
C_collabNet = calcClusteringCoefficient(collabNet)
print('Clustering Coefficient for Erdos Renyi Network: %f' % C_erdosRenyi)
print('Clustering Coefficient for Small World Network: %f' % C_smallWorld)
print('Clustering Coefficient for Collaboration Network: %f' % C_collabNet)
# Execute code for Q1.2
Q1_2()
输出结果为:
Clustering Coefficient for Erdos Renyi Network: 0.000796
Clustering Coefficient for Small World Network: 0.283582
Clustering Coefficient for Collaboration Network: 0.529636
# 2.1 Basic Features
# load the graph G provided in the bundle
G = snap.TUNGraph.Load(snap.TFIn("hw1-q2.graph"))
def cos_sim(x, y):
"""
:param - x: 1D numpy array
:param - y: 1D numpy array
return type: numpy.float64
return: cosine similarity of two nodes according to their feature vectors
"""
if np.linalg.norm(x, 2) == 0 or np.linalg.norm(y, 2) == 0:
return 0
x = np.array(x)
y = np.array(y)
return np.vdot(x, y) / (np.linalg.norm(x, 2) * np.linalg.norm(y, 2))
# calculate basic feature of node v
def cal_basic_feature(NId, Graph):
"""
:param - NId: int, node id
:param - Graph: snap.PUNGraph object representing an undirected graph
return type: numpy array of shape (1, 3)
return: basic feature of vector for the node with ID NId
"""
Node = Graph.GetNI(NId)
neigbor_Id_list = list(Node.GetOutEdges())
feature = np.zeros((1, 3))
EIdV = snap.TIntV()
EIdV.Add(NId)
for Id in neigbor_Id_list:
EIdV.Add(Id)
SubGraph = snap.GetSubGraph(G, EIdV)
# the degree of v
feature[0][0] = Node.GetDeg()
# the number of edges in the egonet of v
# where egonet of v is defined as the subgraph of G induced by v and its neighbourhood
feature[0][1] = SubGraph.GetEdges()
# the number of edges that connect of v and the rest of the graph
# i.e., the number of edges that enter or leave the egonet of v
for Id in neigbor_Id_list:
Ni = Graph.GetNI(Id)
for N in Ni.GetOutEdges():
if N not in neigbor_Id_list:
feature[0][2] += 1
return feature
# compute the basic feature vector for the node with ID9
feature = cal_basic_feature(9, G)
print('ID9的基本特征为', feature)
# report the 5 nodes that are most similar to node 9 (excluding node 9)
features = np.zeros((G.GetNodes(), 3))
for i, Ni in enumerate(G.Nodes()):
NId = Ni.GetId()
features[i] = cal_basic_feature(NId, G)
sim = np.zeros(G.GetNodes())
for i in range(G.GetNodes()):
sim[i] = cos_sim(feature, features[i])
sim_sort_arg = list(np.argsort(sim))
sim_sort_arg.reverse()
print('前5个与ID9基本特征最相似的结点ID为', sim_sort_arg[1:6])
输出结果:
ID9的基本特征为 [[ 6. 10. 7.]]
前5个与ID9基本特征最相似的结点ID为 [415, 836, 85, 68, 1222]
前5个与ID9迭代特征最相似的结点ID为 [973, 415, 537, 496, 25]
# 2.2 Recursive Features
# expand node u's feature
def expand_feature(NId, Graph, features):
"""
:param - Nid: int, node id
:param - Graph: snap.PUNGraph object representing an undirected graph
:param - features: feature vectors of all nodes
return type: numpy array
return: expanded feature vector of node with ID NId
"""
Node = Graph.GetNI(NId)
neigbor_Id_list = list(Node.GetOutEdges())
feature = features[NId]
l = feature.shape[-1] // 3
# concatenate the mean & sum of all u's neighbors' feature vectors
for NId in neigbor_Id_list:
feature[l:2 * l] += features[NId][:l] / len(neigbor_Id_list)
feature[2 * l:] += features[NId][:l]
return feature
def recursive_features(k, Graph, features):
"""
:param - k: int, iterations of recursive operation
:param - Graph: snap.PUNGraph object representing an undirected graph
:param - features: feature vectors of all nodes
return type: numpy array
return: expanded feature vectors of all nodes
"""
for _ in range(k):
# If N(u) is NULL, set the mean & sum to 0,
# where N(u) is the set of neighbors in the graph
features = np.hstack([features, np.zeros((features.shape[0], features.shape[1] * 2))])
for i, fea in enumerate(features):
features[i] = expand_feature(i, Graph, features)
return features
r_features = recursive_features(2, G, features)
r_sim = np.zeros(G.GetNodes())
for i in range(G.GetNodes()):
r_sim[i] = cos_sim(r_features[9], r_features[i])
r_sim_sort_arg = list(np.argsort(r_sim))
r_sim_sort_arg.reverse()
print('前5个与ID9迭代特征最相似的结点ID为', r_sim_sort_arg[1:6])
画直方图
# 2.3 Role Discovery
# produce a 20-bin histogram to show the distribution of cosine similarity
# between node 9 and any other node in the graph (according to their recursive feature vectors)
plt.hist(x=r_sim,
bins=20,
range=(0, 1),
rwidth=0.9)
plt.xticks(np.arange(0, 1.01, 0.05), rotation=45)
plt.ylabel('node counts')
plt.xlabel('similarity with ID9')
plt.show()