作为一种分裂的层次聚类方法,GN算法通过不断从原图中删除边从而达到生成分裂树的目的(从而形成社团)
那么关键点就在于,如何评判删除标准了(本文暂仅详述步骤,对于这一点不作详细解答)
通过不断删除边介数最大的边并循环(每次都重新计算),直到图中没有边存在
步骤:
python
# -*- coding: utf-8 -*-
import networkx as nx
import matplotlib.pyplot as plt
import sys
sys.path.append('../')
class GN:
def __init__(self, G):
self.G_copy = G.copy()
self.G = G
self.partition = [[n for n in G.nodes()]]
self.all_Q = [0.0]
self.max_Q = 0.0
# self.zidian={0.0:[100]}
self.zidian={0:[0]}
# def GG(self):
# return self.G_copy
#Using max_Q to divide communities
def run(self):
#Until there is no edge in the graph
while len(self.G.edges()) != 0:
#Find the most betweenness edge
edge = max(nx.edge_betweenness_centrality(self.G).items(),key=lambda item:item[1])[0]
#Remove the most betweenness edge
self.G.remove_edge(edge[0], edge[1])
#List the the connected nodes
components = [list(c) for c in list(nx.connected_components(self.G))]
if len(components) != len(self.partition):
#compute the Q
cur_Q = self.cal_Q(components, self.G_copy)
if cur_Q not in self.all_Q:
self.all_Q.append(cur_Q)
if cur_Q > self.max_Q:
self.max_Q = cur_Q
self.partition = components
for i in range(len(self.partition)):
self.zidian[i]=self.partition[i]
print('-----------the Divided communities and the Max Q-----------')
print('Max_Q:', self.max_Q)
print('The number of Communites:', len(self.partition))
print("Communites:", self.partition)
return self.partition
def cal_Q(self,partition,G):
m = len(G.edges(None, False))
a = []
e = []
for community in partition:
t = 0.0
for node in community:
t += len([x for x in G.neighbors(node)])
a.append(t/(2*m))
# self.zidian[t/(2*m)]=community
for community in partition:
t = 0.0
for i in range(len(community)):
for j in range(len(community)):
if(G.has_edge(community[i], community[j])):
t += 1.0
e.append(t/(2*m))
q = 0.0
for ei,ai in zip(e,a):
q += (ei - ai**2)
return q
def add_group(self):
num = 0
nodegroup = {}
for partition in self.partition:
for node in partition:
nodegroup[node] = {'group':num}
num = num + 1
nx.set_node_attributes(self.G_copy, nodegroup)
def to_gml(self):
nx.write_gml(self.G_copy, 'outtoGN.gml')
if __name__ == '__main__':
G=nx.read_gml('data.gml',label='id') #Using max_Q to divide communities
algorithm = GN(G)
algorithm.run()
algorithm.add_group()
algorithm.to_gml()
data.gml
graph
[
node
[
id "a"
]
node
[
id "b"
]
edge
[
source "a"
target "b"
]
]
with open('data.gml','w') as f:
f.write("graph\n")
f.write("[\n")
for i in range(len(vertex)):
f.write("node\n")
f.write("[\n")
f.write("id \""+vertex[i]+"\"\n")
f.write("]\n")
for i in range(len(edges)):
f.write("edge\n")
f.write("[\n")
f.write("source \""+edges[i][0]+"\"\n")
f.write("target \""+edges[i][1]+"\"\n")
f.write("]\n")
f.write("]")
其中vertex为节点集,edges为边集
通过不断删除边权比最大的边并循环(每次都重新计算),直到图中没有边存在
步骤:
python
# -*- coding: utf-8 -*-
#GN_weight
import networkx as nx
import matplotlib.pyplot as plt
import sys
sys.path.append('../')
class GN_w:
def __init__(self, G):
self.G_copy = G.copy()
self.G = G
self.partition = [[n for n in G.nodes()]]
self.all_Q = [0.0]
self.max_Q = 0.0
#Using max_Q to divide communities
def run(self):
while len(self.G.edges()) != 0:
edges={}
edges_betweenness_centrality = nx.edge_betweenness_centrality(self.G)
for e, ebc in edges_betweenness_centrality.items():
edge_weight = ebc/self.G.get_edge_data(e[0],e[1])['value']
edges[e]=edge_weight
edge = max(edges.items(), key=lambda item:item[1])[0]
self.G.remove_edge(edge[0], edge[1])
components = [list(c) for c in list(nx.connected_components(self.G))]
if len(components) != len(self.partition):
#compute the Q
cur_Q = self.cal_Q(components, self.G_copy)
if cur_Q not in self.all_Q:
self.all_Q.append(cur_Q)
if cur_Q > self.max_Q:
self.max_Q = cur_Q
self.partition = components
print('-----------the divided communities and the Max Q------------')
print('The number of Communites:', len(self.partition))
print('Max_Q:', self.max_Q)
print(self.partition)
return self.partition, self.all_Q, self.max_Q
def group(self,node1):
num1 = 0
nodegroup1 = {}
for partition in self.partition:
for node in partition:
nodegroup1[node] = {'group':num1}
num1 = num1 + 1
nx.set_node_attributes(self.G_copy, nodegroup1)
for partition in self.partition:
for node in partition:
if node==node1:
return nodegroup1[node]['group']
else:pass
def add_group(self):
num = 0
nodegroup = {}
for partition in self.partition:
for node in partition:
nodegroup[node] = {'group':num}
num = num + 1
nx.set_node_attributes(self.G_copy, nodegroup)
return self.partition
def to_gml(self):
nx.write_gml(self.G_copy, 'outtoGN_weighted.gml')
#Computing the Q
def cal_Q(self,partition,G):
m = len(G.edges(None, False))
a = []
e = []
for community in partition:
t = 0.0
for node in community:
t += len([x for x in G.neighbors(node)])
a.append(t/(2*m))
for community in partition:
t = 0.0
for i in range(len(community)):
for j in range(len(community)):
if(G.has_edge(community[i], community[j])):
t += 1.0
e.append(t/(2*m))
q = 0.0
for ei,ai in zip(e,a):
q += (ei - ai**2)
return q
if __name__ == '__main__':
G=nx.read_gml('data.gml')
algorithm = GN_w(G)
algorithm.run()
zidian=algorithm.add_group()
algorithm.to_gml()
data.gml
graph
[
node
[
id 1
label "a"
]
node
[
id 2
label "b"
]
edge
[
source 1
target 2
value 0.6153846153846154
]
]
#GN_weight
with open('data.gml','w') as f: #设置文件对象
f.write("graph\n")
f.write("[\n")
for i in range(len(vertex)):
f.write("node\n")
f.write("[\n")
f.write("id "+str(vertexxuhao[vertex[i]])+"\n")
f.write("label \""+vertex[i]+"\"\n")
f.write("]\n")
for i in range(len(edges)):
f.write("edge\n")
f.write("[\n")
f.write("source "+str(edgesxuhao[edges[i][0]])+"\n")
f.write("target "+str(edgesxuhao[edges[i][1]])+"\n")
f.write("value "+str(edgesvalue[(edges[i][0],edges[i][1])])+"\n")
f.write("]\n")
f.write("]")
其中vertex、edges分别为节点集和边集,vertexxuhao、edgesxuhao分别为对应序号字典,edgesvalue为每条边对应权值字典