# Copyright (c)2018, 东北大学软件学院学生
# All rightsreserved
# 文件名称:nmi.py
# 作 者:孔云
#问题描述:计算算法聚类结果与ground truth的相似度
# coding:utf-8
#A是标准聚类结果,B是算法聚类结果
from sklearn import metrics
import numpy as np
def open_file(file):
temp=[]
with open(file, 'r') as f:
data = f.readlines()
for row in data:
temp=eval(row)
print("temp",temp)
return np.array(temp)
if __name__ == '__main__':
file1 = 're1.txt'
file2 = 're2.txt'
A=open_file(file1)
B=open_file(file2)
print("A",A)
print("B",B)
result_NMI=metrics.normalized_mutual_info_score(A, B)
print("result_NMI:",result_NMI)
NMI是聚类结果与数据集真实情况(ground truth)的相似度。取值范围在 0-1,值越大,说明聚类结果与数据集真实情况(ground truth)的相似度越大,聚类结果越好。
相关论文介绍:http://www.neusncp.com/static/upload/article/201807242106289781.pdf