cs224w作业1

cs224w作业1

  • 准备工作
    • 生成Erdos-R´enyi随机图
    • 生成Small-World随机图
    • 导入Real-World Collaboration Network,并去掉self-edges
  • 1. Network Characteristics
    • 1.1 度的分布
    • 1.2 Clustering Coefficient
  • 2. Structural Roles: Rolx and ReFex
    • 2.1 Basic Features
    • 2.2 Recursive Features
    • 2.3 Role Discovery

代码可能写得比较丑陋,先实现功能,求轻喷,有错的话请多多指教

准备工作

导入库、全局变量

import snap
import numpy as np
import matplotlib.pyplot as plt
import random

# Setup
erdosRenyi = None
smallWorld = None
collabNet = None

生成Erdos-R´enyi随机图

def genErdosRenyi(N=5242, E=14484):
    """
    :param - N: number of nodes
    :param - E: number of edges

    return type: snap.PUNGraph
    return: Erdos-Renyi graph with N nodes and E edges
    """
    ############################################################################
    # TODO: Your code here!
    Graph = snap.TUNGraph.New()

    # generate nodes
    for i in range(1, N + 1):
        Graph.AddNode(i)

    # generate edges
    edge_cnt = 0
    while edge_cnt < E:
        N1, N2 = random.sample([i for i in range(1, N + 1)], 2)

        if N1 == N2:
            continue

        if Graph.AddEdge(N1, N2) == -1:
            edge_cnt += 1
    ############################################################################
    return Graph

生成Small-World随机图

def genCircle(N=5242):
    """
    :param - N: number of nodes

    return type: snap.PUNGraph
    return: Circle graph with N nodes and N edges. Imagine the nodes form a
        circle and each node is connected to its two direct neighbors.
    """
    ############################################################################
    # TODO: Your code here!

    Graph = snap.TUNGraph.New()

    # generate nodes
    for i in range(N):
        Graph.AddNode(i)

    # generate edges
    for i in range(N):
        Graph.AddEdge(i, (i + 1) % N)

    ############################################################################
    return Graph


def connectNbrOfNbr(Graph, N=5242):
    """
    :param - Graph: snap.PUNGraph object representing a circle graph on N nodes
    :param - N: number of nodes

    return type: snap.PUNGraph
    return: Graph object with additional N edges added by connecting each node
        to the neighbors of its neighbors
    """
    ############################################################################
    # TODO: Your code here!

    # generate edges
    for i in range(N):
        Graph.AddEdge(i, (i + 2) % N)

    ############################################################################
    return Graph


def connectRandomNodes(Graph, M=4000):
    """
    :param - Graph: snap.PUNGraph object representing an undirected graph
    :param - M: number of edges to be added

    return type: snap.PUNGraph
    return: Graph object with additional M edges added by connecting M randomly
        selected pairs of nodes not already connected.
    """
    ############################################################################
    # TODO: Your code here!

    N = Graph.GetNodes()

    # generate edges
    edge_cnt = 0
    while edge_cnt < M:
        N1, N2 = random.sample([i for i in range(N)], 2)

        if N1 == N2:
            continue

        if Graph.AddEdge(N1, N2) == -1:
            edge_cnt += 1

    ############################################################################
    return Graph


def genSmallWorld(N=5242, E=14484):
    """
    :param - N: number of nodes
    :param - E: number of edges

    return type: snap.PUNGraph
    return: Small-World graph with N nodes and E edges
    """
    Graph = genCircle(N)
    Graph = connectNbrOfNbr(Graph, N)
    Graph = connectRandomNodes(Graph, 4000)
    return Graph

导入Real-World Collaboration Network,并去掉self-edges

def loadCollabNet(path):
    """
    :param - path: path to edge list file

    return type: snap.PUNGraph
    return: Graph loaded from edge list at `path and self edges removed

    Do not forget to remove the self edges!
    """
    ############################################################################
    # TODO: Your code here!
    # load graph from edge list at path
    Graph = snap.LoadEdgeList(snap.PUNGraph, path, 0, 1, '\t')

    # remove self edges
    for EI in Graph.Edges():
        N_src, N_dst = EI.GetSrcNId(), EI.GetDstNId()
        if N_src == N_dst:
            Graph.DelEdge(N_src, N_dst)
    ############################################################################
    return Graph

1. Network Characteristics

1.1 度的分布

绘制度的分布直方图

def getDataPointsToPlot(Graph):
    """
    :param - Graph: snap.PUNGraph object representing an undirected graph

    return values:
    X: list of degrees
    Y: list of frequencies: Y[i] = fraction of nodes with degree X[i]
    """
    ############################################################################
    # TODO: Your code here!
    X, Y = [], []
    N = Graph.GetNodes()
    deg_dict = dict()

    # traverse the nodes and collect the degrees
    for NI in Graph.Nodes():
        deg = NI.GetDeg()
        if deg in deg_dict.keys():
            deg_dict[deg] += 1
        else:
            deg_dict[deg] = 1

    X = [i for i in range(max(list(deg_dict.keys())) + 1)]
    for i, deg in enumerate(X):
        if X[i] in deg_dict.keys():
            Y.append(deg_dict[deg] / N)
        else:
            Y.append(0)
    ############################################################################
    return X, Y

def Q1_1():
    """
    Code for HW1 Q1.1
    """
    global erdosRenyi, smallWorld, collabNet
    erdosRenyi = genErdosRenyi(5242, 14484)
    smallWorld = genSmallWorld(5242, 14484)
    collabNet = loadCollabNet("ca-GrQc.txt")

    x_erdosRenyi, y_erdosRenyi = getDataPointsToPlot(erdosRenyi)
    plt.loglog(x_erdosRenyi, y_erdosRenyi, color='y', label='Erdos Renyi Network')

    x_smallWorld, y_smallWorld = getDataPointsToPlot(smallWorld)
    plt.loglog(x_smallWorld, y_smallWorld, linestyle='dashed', color='r', label='Small World Network')

    x_collabNet, y_collabNet = getDataPointsToPlot(collabNet)
    plt.loglog(x_collabNet, y_collabNet, linestyle='dotted', color='b', label='Collaboration Network')

    plt.xlabel('Node Degree (log)')
    plt.ylabel('Proportion of Nodes with a Given Degree (log)')
    plt.title('Degree Distribution of Erdos Renyi, Small World, and Collaboration Networks')
    plt.legend()
    plt.show()


# Execute code for Q1.1
Q1_1()

结果图
cs224w作业1_第1张图片

1.2 Clustering Coefficient

# Problem 1.2 - Clustering Coefficient

def calcClusteringCoefficientSingleNode(Node, Graph):
    """
    :param - Node: node from snap.PUNGraph object. Graph.Nodes() will give an
                   iterable of nodes in a graph
    :param - Graph: snap.PUNGraph object representing an undirected graph

    return type: float
    returns: local clustering coeffient of Node
    """
    ############################################################################
    # TODO: Your code here!
    C = 0.0
    deg = Node.GetDeg()

    if deg < 2:
        return C

    neighbor_edge_cnt = 0
    neigbor_list = list(Node.GetOutEdges())

    for i, src in enumerate(neigbor_list):
        for j in range(i + 1, len(neigbor_list)):
            if Graph.IsEdge(src, neigbor_list[j]):
                neighbor_edge_cnt += 1

    C = (2 * neighbor_edge_cnt) / (deg * (deg - 1))

    ############################################################################
    return C


def calcClusteringCoefficient(Graph):
    """
    :param - Graph: snap.PUNGraph object representing an undirected graph

    return type: float
    returns: clustering coeffient of Graph
    """
    ############################################################################
    # TODO: Your code here! If you filled out calcClusteringCoefficientSingleNode,
    #       you'll probably want to call it in a loop here
    C = 0.0

    # traverse the nodes and calculate the clustering coefficient
    for NI in Graph.Nodes():
        C += calcClusteringCoefficientSingleNode(NI, Graph)

    C = C / Graph.GetNodes()
    ############################################################################
    return C


def Q1_2():
    """
    Code for Q1.2
    """
    C_erdosRenyi = calcClusteringCoefficient(erdosRenyi)
    C_smallWorld = calcClusteringCoefficient(smallWorld)
    C_collabNet = calcClusteringCoefficient(collabNet)

    print('Clustering Coefficient for Erdos Renyi Network: %f' % C_erdosRenyi)
    print('Clustering Coefficient for Small World Network: %f' % C_smallWorld)
    print('Clustering Coefficient for Collaboration Network: %f' % C_collabNet)


# Execute code for Q1.2
Q1_2()

输出结果为:

Clustering Coefficient for Erdos Renyi Network: 0.000796
Clustering Coefficient for Small World Network: 0.283582
Clustering Coefficient for Collaboration Network: 0.529636

2. Structural Roles: Rolx and ReFex

2.1 Basic Features

# 2.1 Basic Features
# load the graph G provided in the bundle
G = snap.TUNGraph.Load(snap.TFIn("hw1-q2.graph"))


def cos_sim(x, y):
    """
    :param - x: 1D numpy array
    :param - y: 1D numpy array

    return type: numpy.float64
    return: cosine similarity of two nodes according to their feature vectors
    """
    if np.linalg.norm(x, 2) == 0 or np.linalg.norm(y, 2) == 0:
        return 0

    x = np.array(x)
    y = np.array(y)
    return np.vdot(x, y) / (np.linalg.norm(x, 2) * np.linalg.norm(y, 2))


# calculate basic feature of node v
def cal_basic_feature(NId, Graph):
    """
    :param - NId: int, node id
    :param - Graph: snap.PUNGraph object representing an undirected graph

    return type: numpy array of shape (1, 3)
    return: basic feature of vector for the node with ID NId
    """
    Node = Graph.GetNI(NId)
    neigbor_Id_list = list(Node.GetOutEdges())
    feature = np.zeros((1, 3))

    EIdV = snap.TIntV()
    EIdV.Add(NId)
    for Id in neigbor_Id_list:
        EIdV.Add(Id)
    SubGraph = snap.GetSubGraph(G, EIdV)

    # the degree of v
    feature[0][0] = Node.GetDeg()

    # the number of edges in the egonet of v
    # where egonet of v is defined as the subgraph of G induced by v and its neighbourhood
    feature[0][1] = SubGraph.GetEdges()

    # the number of edges that connect of v and the rest of the graph
    # i.e., the number of edges that enter or leave the egonet of v
    for Id in neigbor_Id_list:
        Ni = Graph.GetNI(Id)

        for N in Ni.GetOutEdges():
            if N not in neigbor_Id_list:
                feature[0][2] += 1

    return feature


# compute the basic feature vector for the node with ID9
feature = cal_basic_feature(9, G)
print('ID9的基本特征为', feature)

# report the 5 nodes that are most similar to node 9 (excluding node 9)
features = np.zeros((G.GetNodes(), 3))
for i, Ni in enumerate(G.Nodes()):
    NId = Ni.GetId()
    features[i] = cal_basic_feature(NId, G)

sim = np.zeros(G.GetNodes())
for i in range(G.GetNodes()):
    sim[i] = cos_sim(feature, features[i])

sim_sort_arg = list(np.argsort(sim))
sim_sort_arg.reverse()
print('前5个与ID9基本特征最相似的结点ID为', sim_sort_arg[1:6])

输出结果:

ID9的基本特征为 [[ 6. 10.  7.]]
前5个与ID9基本特征最相似的结点ID为 [415, 836, 85, 68, 1222]
前5个与ID9迭代特征最相似的结点ID为 [973, 415, 537, 496, 25]

2.2 Recursive Features

# 2.2 Recursive Features
# expand node u's feature
def expand_feature(NId, Graph, features):
    """
    :param - Nid: int, node id
    :param - Graph: snap.PUNGraph object representing an undirected graph
    :param - features: feature vectors of all nodes
    
    return type: numpy array
    return: expanded feature vector of node with ID NId
    """
    Node = Graph.GetNI(NId)
    neigbor_Id_list = list(Node.GetOutEdges())
    feature = features[NId]
    l = feature.shape[-1] // 3

    # concatenate the mean & sum of all u's neighbors' feature vectors
    for NId in neigbor_Id_list:
        feature[l:2 * l] += features[NId][:l] / len(neigbor_Id_list)
        feature[2 * l:] += features[NId][:l]

    return feature


def recursive_features(k, Graph, features):
    """
    :param - k: int, iterations of recursive operation
    :param - Graph: snap.PUNGraph object representing an undirected graph
    :param - features: feature vectors of all nodes

    return type: numpy array
    return: expanded feature vectors of all nodes
    """
    for _ in range(k):
        # If N(u) is NULL, set the mean & sum to 0,
        # where N(u) is the set of neighbors in the graph
        features = np.hstack([features, np.zeros((features.shape[0], features.shape[1] * 2))])

        for i, fea in enumerate(features):
            features[i] = expand_feature(i, Graph, features)

    return features


r_features = recursive_features(2, G, features)
r_sim = np.zeros(G.GetNodes())
for i in range(G.GetNodes()):
    r_sim[i] = cos_sim(r_features[9], r_features[i])

r_sim_sort_arg = list(np.argsort(r_sim))
r_sim_sort_arg.reverse()
print('前5个与ID9迭代特征最相似的结点ID为', r_sim_sort_arg[1:6])

2.3 Role Discovery

画直方图

# 2.3 Role Discovery
# produce a 20-bin histogram to show the distribution of cosine similarity
# between node 9 and any other node in the graph (according to their recursive feature vectors)
plt.hist(x=r_sim,
         bins=20,
         range=(0, 1),
         rwidth=0.9)
plt.xticks(np.arange(0, 1.01, 0.05), rotation=45)
plt.ylabel('node counts')
plt.xlabel('similarity with ID9')
plt.show()

cs224w作业1_第2张图片

你可能感兴趣的:(CS224w)