/********社团发现***********/
目标:找带权重的社团
选型:networkx社团发现算法很少,igraph算法多,Spark Graphx能找到高效的实现方法。
igraph: 找到社团并提取结果信息
install.packages("igraph")
library(igraph)
# This is so simple that we will have only one level
g <- make_full_graph(5) %du% make_full_graph(5) %du% make_full_graph(5) //构造三个社团,Disjoint union of graphs
g <- add_edges(g, c(1,6, 1,11, 6, 11)) //构造社团间关系
communities = cluster_louvain(g)
#extract results from detected communities
cs <- communities
membership(cs) //成员所属社团
print(cs) //社团和社团成员
modularity(cs)
length(cs) //社团个数
sizes(cs) //各社团成员数
algorithm(cs) //所用算法名
is_hierarchical(cs) //是否是层次算法
plot(cs, g) //对网络中成员社团所属类绘图
#另一种数据和方法
karate <- make_graph("Zachary")
wc <- cluster_walktrap(karate)
modularity(wc)
membership(wc)
plot(wc, karate)
参考:
1. http://igraph.org/r/doc/cluster_louvain.html louvain社团发现算法
2. http://igraph.org/r/doc/communities.html 从发现的社团中抽取信息
3. http://jfaganuk.github.io/2015/01/24/basic-network-analysis/ 关于在igraph中用SNA包的flow betweenness功能
/*********社团中找角色************/
#0. 总论:
社交网络(网络上有影响力的人),权利中心(小团体中的特殊角色),(关键基础设施节点)交通网络节点(找到全国范围瓶颈节点),互联网(数据中心网络),城市网络(城市拥堵点),疾病网络(找到发病源)
中心性度量:是为了社会学起源,首先是社会网络然后才是社交网络。
节点影响力度量(Node influence metric):每个节点都要计算。
中心度概念参考:https://en.wikipedia.org/wiki/Centrality
#1.找中介(vertex/edge betweenness centrality):一个社团内可能有多个中介,社团之间中介数量分布不平衡。
karate <- make_graph("Zachary")
cs = cluster_louvain(karate)
scores <- betweenness(karate)
newarr <- cbind(1:length(scores), scores)//合并两个单列数组为二维数组,betweennesss值对应图中顶点索引号
newarr <- newarr[order(newarr[,2], decreasing=TRUE),]//对二维数组按第二列降序排序,其它列(顶点索引)随着变。
//选择前几个betweenness大的元素为中介节点。
V(karate)$shape <- "circle"
V(karate)[newarr[,1][1:length(cs)]]$shape <- "square"
plot(cs, karate) //社团算法后图的大小可以改,其它好像不能改。
#2. 找社团先知/sensor(closeness centrality)
karate <- make_graph("Zachary")
cs = cluster_louvain(karate)
closes <- closeness(karate)
newarr <- cbind(1:length(closes), closes)
newarr <- newarr[order(newarr[,2], decreasing=TRUE),]
V(karate)$shape <- "circle"
V(karate)[newarr[,1][1:length(cs)]]$shape <- "square"
plot(cs, karate)
#下面改变颜色的代码不起作用
V(karate)$color <- "red"
V(karate)[newarr[,1][1:length(cs)]]$color <- "red"
plot(cs, karate)
#读取gml文件
library(igraph)
coauthor <- read_graph("/root/netscience.gml", format="gml")
cs = cluster_louvain(coauthor)
closes <- closeness(coauthor)
newarr <- cbind(1:length(closes), closes)
newarr <- newarr[order(newarr[,2], decreasing=TRUE),]
V(coauthor)$color <- "green"
V(coauthor)[newarr[,1][1:length(cs)]]$color <- "red"
plot(coauthor, layout=layout_with_graphopt, vertex.size=1, vertex.label="")
plot(cs, coauthor, layout=layout_with_graphopt, vertex.size=1, vertex.label="")
#3. 找流量最大的节点(flow betweenness)(需要权重图)
mat <- as.matrix(get.adjacency(karate)
flowbts <- sna::flowbet(mat)
#4. 找最重要的节点/driver(Eigenvector Centrality, pagerank是ecentrality的变体)
karate <- make_graph("Zachary")
cs = cluster_louvain(karate)
ec <- eigen_centrality(karate)
newarr <- cbind(1:length(ec$vector), ec$vector)
newarr <- newarr[order(newarr[,2], decreasing=TRUE),]
V(karate)$shape <- "circle"
V(karate)[newarr[,1][1:length(cs)]]$shape <- "square"
plot(cs, karate)
参考:
1. http://igraph.org/r/doc/eigen_centrality.html
2. https://en.wikipedia.org/wiki/Eigenvector_centrality
3. http://cos.name/cn/topic/107575/
4. http://www.cnblogs.com/zidiancao/p/3937120.html