K-shell分解法,O(N)
step 1 查找网络中所有度为1的节点,并将这些节点及连接的边去掉。
step 2 经过步骤1后,网络中可能会出现新的度为1的节点,循环执行步骤1,直至所剩的网络中没有度为1的节点为止。
step 3 去掉网络中剩余度为2的节点,一次类推,重复循环执行。
step 4 重复以上操作,直至网络中没有节点为止。
代码实现:
library(dplyr)
library(RMySQL) # 载入RMySQL包
conn <- dbConnect(MySQL(),dbname = "NETWORKS",username = "root") # 建立数据库连接
dbSendQuery(conn, "SET @@sql_mode=ANSI;") # 启动非严格模式
dbSendQuery(conn, "SET NAMES GBK") # win7环境下如果汉字乱码,就运行这条命令
res <- dbSendQuery(conn, "SELECT nodeID,Weight FROM NODEWeight1 ORDER BY nodeID")
dat <- dbFetch(res, n=-1) #n=-1表示取所有数据,n=2表示取2条数据
res1 <- dbSendQuery(conn, "SELECT nodeID1,nodeID2 FROM NODEMap1 ORDER BY nodeID2")
dat1 <- dbFetch(res1, n=-1) #n=-1表示取所有数据,n=2表示取2条数据
dbDisconnect(conn)
output <- data.frame(nodeID = 0,seq = 0) #建立空的输出序列
output <- output[-1,] #清空数据
node_left <- nrow(dat)
node_cnt <- 0
for (i in 1:2000)
{
min_weight <- min(dat$Weight) #找出权重表最小的权重
out_nodes <- dat[dat$Weight==min_weight,] #找出权重最小的点
node_left <- node_left-nrow(out_nodes)
node_cnt <- node_cnt+nrow(out_nodes)
output <- rbind(cbind(nodeID = out_nodes$nodeID,seq = i) , output)
dat <- dat[-which(dat$Weight==min_weight),] #删除权重最小的点
if (node_cnt>2000) #输出结点
{
#屏幕打印
print(Sys.time())
cat("i:",i,"\tmin_weight:",min_weight,"\tout_nodes:",nrow(out_nodes),"\tnode_cnt:",node_cnt,"\tnode_left:",node_left,"\n")
node_cnt <- 0
# file.path <- paste("E:/.../方案四/output_20170706_",i,".csv",sep="")
# write.table(output,file.path, col.names=T,row.names = F, quote = F, sep=",")
}
if (node_left<=0)
{
file.path <- paste("E:/.../方案四/output_20170706_",1,".csv",sep="")
write.table(output,file.path, col.names=T,row.names = F, quote = F, sep=",")
break #如果权重表长度为0,退出
}
del_weight <- inner_join(dat1,out_nodes,c("nodeID2" = "nodeID")) #INNER JOIN找出最小点关联的结点集
planes <- group_by(del_weight, nodeID1) #将3中的结果集group by-count
del_weight <- summarise(planes, Weight1 = n() )
dat <- left_join(dat,del_weight,c("nodeID" = "nodeID1")) #将当前权重表和4中的结果LEFT JOIN
dat[is.na(dat)] <- 0 ### NA置0
dat$Weight <- dat$Weight-dat$Weight1
dat <- dat[,c(1,2)] #得出新的权重表
}
合并多个模型结果:
for (i in 1:8)
{
file.path <- paste("E:/.../方案四/output_20170706_",i,".csv",sep="")
result <- read.csv(file.path,header=T,stringsAsFactors=FALSE)
result <- matrix(result$nodeID,nr=500)
result <- t(result)
if (i==1) output <- cbind(NetID="model1" ,result )
if (i==2) output <- rbind(output, cbind(NetID="model2" ,result) )
if (i==3) output <- rbind(output, cbind(NetID="model3" ,result) )
if (i==4) output <- rbind(output, cbind(NetID="model4" ,result) )
if (i==5) output <- rbind(output, cbind(NetID="real1" ,result) )
if (i==6) output <- rbind(output, cbind(NetID="real2" ,result) )
if (i==7) output <- rbind(output, cbind(NetID="real3" ,result) )
if (i==8) output <- rbind(output, cbind(NetID="real4" ,result) )
}
file.path <- paste("E:/.../方案四/output_20170706_output.csv",sep="")
output[is.na(output)] <- ""
write.table(output,file.path, col.names=T,row.names = F, quote = F, sep=",")
最终平台计算得分:2.03404