参考链接:http://blog.sina.com.cn/s/blog_710e9b550101aqnv.html
实战例子:
某医院为了提高自身的护理水平,对拥有的11个科室进行了考核,考核标准包括9项整体护理,并对护理水平较好的科室进行奖励。下表是对各个科室指标考核后的评分结果。
数据源
科室 X1 X2 X3 X4 X5 X6 X7 X8 X9
A 100 90 100 84 90 100 100 100 100
B 100 100 78.6 100 90 100 100 100 100
C 75 100 85.7 100 90 100 100 100 100
D 100 100 78.6 100 90 100 94.4 100 100
E 100 90 100 100 100 90 100 100 80
F 100 100 100 100 90 100 100 85.7 100
G 100 100 78.6 100 90 100 55.6 100 100
H 87.5 100 85.7 100 100 100 100 100 100
I 100 100 92.9 100 80 100 100 100 100
J 100 90 100 100 100 100 100 100 100
K 100 100 92.9 100 90 100 100 100 100
python 代码实现:
# -*- encoding=utf-8 -*-
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
def get_score(wi_list,data):
"""
:param wi_list: 权重系数列表
:param data:评价指标数据框
:return:返回得分
"""
# 将权重转换为矩阵
cof_var = np.mat(wi_list)
# 将数据框转换为矩阵
context_train_data = np.mat(data)
# 权重跟自变量相乘
last_hot_matrix = context_train_data * cof_var.T
last_hot_matrix = pd.DataFrame(last_hot_matrix)
# 累加求和得到总分
last_hot_score = list(last_hot_matrix.apply(sum))
# max-min 归一化
# last_hot_score_autoNorm = autoNorm(last_hot_score)
# 值映射成分数(0-100分)
# last_hot_score_result = [i * 100 for i in last_hot_score_autoNorm]
return last_hot_score
def get_entropy_weight(data):
"""
:param data: 评价指标数据框
:return: 各指标权重列表
"""
# 数据标准化
data = (data - data.min())/(data.max() - data.min())
m,n=data.shape
#将dataframe格式转化为matrix格式
data=data.as_matrix(columns=None)
k=1/np.log(m)
yij=data.sum(axis=0)
#第二步,计算pij
pij=data/yij
test=pij*np.log(pij)
test=np.nan_to_num(test)
#计算每种指标的信息熵
ej=-k*(test.sum(axis=0))
#计算每种指标的权重
wi=(1-ej)/np.sum(1-ej)
wi_list=list(wi)
return wi_list
if __name__ == '__main__':
data0 = pd.read_excel("C:\\Users\\xiaohu\\Desktop\\文本挖掘\\文本质量综合评价算法\\test2.xlsx", encoding='utf8')
data = data0.iloc[:, 1:10]
mm=data
wi_list=get_entropy_weight(data)
score_list=get_score(mm,wi_list)
mm['score']=score_list
mm['科室']=data0['科室']
# 然后对数据框按得分从大到小排序
result = mm.sort_values(by='score', axis=0, ascending=False)
result['rank'] = range(1, len(result) + 1)
print(result)
# 写出csv数据
result.to_csv('C:\\Users\\xiaohu\\Desktop\\文本挖掘\\文本质量综合评价算法\\test2_result.csv', index=False)
数据结果:
X1 X2 X3 X4 X5 X6 X7 X8 X9 score 科室 rank
5 100.0 100 100.0 100 90 100 100.0 85.7 100 98.010056 F 1
9 100.0 90 100.0 100 100 100 100.0 100.0 100 97.808413 J 2
10 100.0 100 92.9 100 90 100 100.0 100.0 100 97.021269 K 3
8 100.0 100 92.9 100 80 100 100.0 100.0 100 95.969292 I 4
4 100.0 90 100.0 100 100 90 100.0 100.0 80 95.840649 E 5
0 100.0 90 100.0 84 90 100 100.0 100.0 100 95.706962 A 6
7 87.5 100 85.7 100 100 100 100.0 100.0 100 95.172035 H 7
2 75.0 100 85.7 100 90 100 100.0 100.0 100 93.172738 C 8
1 100.0 100 78.6 100 90 100 100.0 100.0 100 93.140624 B 9
3 100.0 100 78.6 100 90 100 94.4 100.0 100 92.770375 D 10
6 100.0 100 78.6 100 90 100 55.6 100.0 100 90.205085 G 11
Process finished with exit code 0
补充,有的指标是正向,有的指标是负向,这下又如何呢?
# -*- encoding=utf-8 -*-
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
# 归一化函数
def guiyi(data,type,ymin,ymax):
"""
实现正向或负向指标归一化,返回归一化后的数据矩阵
x为原始数据矩阵, 一行代表一个样本, 每列对应一个指标
type设定正向指标1,负向指标2
ymin,ymax为归一化的区间端点
"""
x=data.as_matrix(columns=None)
n, m= np.shape(x)
y = np.zeros((n, m))
xmin = np.min(x,axis=0)
xmax = np.max(x,axis=0)
print(xmin)
print(xmax)
for j in range(0,m):
if type[j] == 1:
y[:, j]=(ymax - ymin) * (x[:, j] - xmin[j]) /((xmax[j] - xmin[j]) + ymin)
else:
y[:, j]=(ymax - ymin) * (xmax[j] - x[:, j]) /((xmax[j] - xmin[j]) + ymin)
print(y[:, j])
return y
# 计算指标权重函数
def get_entropy_weight(y):
"""
:param data: 归一化矩阵
:return: 各指标权重列表
"""
n, m = np.shape(y)
k=1/np.log(m)
yij=y.sum(axis=0)
#第二步,计算pij
pij=y/yij
test=pij*np.log(pij)
test=np.nan_to_num(test)
#计算每种指标的信息熵
ej=-k*(test.sum(axis=0))
#计算每种指标的权重
wi=(1-ej)/np.sum(1-ej)
wi_list=list(wi)
return wi_list
# 计算综合评分
def get_score(wi_list,y):
"""
:param wi_list: 权重系数列表
:param data:评价指标数据框
:return:返回得分
"""
# 将权重转换为矩阵
cof_var = np.mat(wi_list)
# 将数据框转换为矩阵
context_train_data = np.mat(y)
# 权重跟自变量相乘,对应元素相乘相加求和
last_hot_matrix = context_train_data * cof_var.T*100
last_hot_score = pd.DataFrame(last_hot_matrix)
print(last_hot_score)
return last_hot_score
if __name__ == '__main__':
data=pd.DataFrame({'x1':[871,150,4820,15,180],'x2':[200,800,141,180,180],'x3':[700,500,812,5,48]})
print(data)
type=[2,2,2]
ymin=0.002
ymax=0.996
y=guiyi(data,type,ymin,ymax)
print(y)
wi_list=get_entropy_weight(y)
print(wi_list)
last_hot_score=get_score(wi_list,y)
print(last_hot_score)
data['score'] = last_hot_score.iloc[:,0]
# 然后对数据框按得分从大到小排序
result = data.sort_values(by='score', axis=0, ascending=False)
result['rank'] = range(1, len(result) + 1)
print(result)