多种综合评价方法的python实现

  综合评价类方法是数学建模中常用的方法,主要作用是对多组数据赋权,算是非常万金油的办法。网上有很多封装好的软件可以实现综合评价,但是欠缺一定的灵活性,在这里我们尝试用python实现综合评价。
  综合评价方法分为主观型综合评价方法和客观型综合评价方法,我们在此尝试两类综合评价方法中的几个典型方法:
多种综合评价方法的python实现_第1张图片
  各种综合评价方法在此不多介绍原理,我们直接放代码实现(水平有限,代码有些冗长):

AHP

  在实现AHP代码前需要先对各个指标做专家评分,层次分析图示如下:
多种综合评价方法的python实现_第2张图片
  代码如下实现如下:

def ahp(data, rows, columns):
    f_score = read_file(feature_score, 0)	# 专家对各项指标的重要度评分存储在feature_score中
    # print(评分:f_score)
    f_score = f_score.prod(axis=1)
    f_score = np.power(f_score, 1/2)
    W = f_score/sum(f_score)			# 计算权重
    # print("权重:\n", W)
    score = data
    for i in range(rows):
        for j in range(columns):
            score[i][j] = score[i][j]*W[i]		# 计算分数
    score = score.sum(axis=0)
    weight.append(W.tolist())
    return score

熵权法TOPSIS

  代码如下:

def E_j_fun(data, rows, columns):  #计算熵值
    E = np.array([[None] * columns for i in range(rows)])   # 新建空矩阵
    for i in range(rows):
        for j in range(columns):
            if data[i][j] == 0:
                e_ij = 0.0
            else:
                P_ij = data[i][j] / data.sum(axis=0)[j]  # 计算比重(列求和)
                e_ij = (-1 / np.log(rows)) * P_ij * np.log(P_ij)
            E[i][j] = e_ij
    # print(E)
    E_j=E.sum(axis=0)       # 求出每列信息熵(指标)列求和
    return E_j

def topsis(data, rows, columns):        # topsis综合评价
    Z_ij = np.array([[None] * columns for i in range(rows)])   # 新建空矩阵(加权标准化矩阵)
    E_j = E_j_fun(data, rows, columns)       # 第j个指标的信息熵
    # print(E_j)
    G_j = 1-E_j               # 信息差异度矩阵
    # print(G_j)
    W_j = G_j/sum(G_j)        # 计算权重
    for i in range(rows):
        for j in range(columns):
            Z_ij[i][j] = data[i][j] * W_j[j]
    Imax_j = Z_ij.max(axis=0)  # 最优解
    Imin_j = Z_ij.min(axis=0)  # 最劣解
    Dmax_ij = np.array([[None] * columns for i in range(rows)])
    Dmin_ij = np.array([[None] * columns for i in range(rows)])
    for i in range(rows):
        for j in range(columns):
            Dmax_ij[i][j] = (Imax_j[j] - Z_ij[i][j]) ** 2
            Dmin_ij[i][j] = (Imin_j[j] - Z_ij[i][j]) ** 2
    Dmax_i = Dmax_ij.sum(axis=1) ** 0.5  # 最优解欧氏距离
    Dmin_i = Dmin_ij.sum(axis=1) ** 0.5  # 最劣解欧氏距离
    C_i = Dmin_i / (Dmax_i + Dmin_i)  # 综合评价值
    weight.append(W_j.tolist())
    # print(C_i)
    return C_i

熵权法综合评价

  代码如下:

def E_evalution(data, rows, columns):        # 熵权法综合评价
    Z_ij = np.array([[None] * columns for i in range(rows)])  # 新建空矩阵(加权标准化矩阵)
    E_j = E_j_fun(data, rows, columns)  # 第j个指标的信息熵
    G_j = 1 - E_j  # 信息差异度矩阵
    W_j = G_j / (columns - sum(G_j))  # 计算权重
    for i in range(rows):
        for j in range(columns):
            Z_ij[i][j] = data[i][j] * W_j[j]
    ret = Z_ij.sum(axis=1)
    weight.append(W_j.tolist())
    # print("ret", ret)
    return ret

CRITIC

  代码如下:

def critic(data, rows, columns):
    Z_ij = np.array([[None] * rows for i in range(columns)])
    data_std = np.std(data, axis=1, ddof=1)
    # print(data_std)
    data_rela = np.corrcoef(data)
    data_rela = data_rela.sum(axis=1)
    # print(data_std, "\n", data_rela)        # 样本标准差(n-1)
    C_i = data_rela * data_std              # 矩阵点乘
    W_i = C_i/sum(C_i)
    # print(W_i)
    for i in range(columns):
        for j in range(rows):
            Z_ij[i][j] = data[i][j] * W_i[i]
    ret = Z_ij.sum(axis=0)
    # print(ret)
    weight.append(W_i.tolist())
    return ret

因子分析

  代码如下:

def factor(data, columns, rows):
    df2_corr = np.corrcoef(data.T)  # 皮尔逊相关系数
    kmo = calculate_kmo(data)  # kmo值要大于0.7
    bartlett = calculate_bartlett_sphericity(data)  # bartlett球形度检验p值要小于0.05
    print('kmo:{},bartlett:{}'.format(kmo[1], bartlett))
    # 使用最大方差法旋转因子载荷矩阵
    fa = FactorAnalyzer(n_factors=rows, rotation='varimax', method='principal', impute='mean')
    fa.fit(data)
    fa_sd = fa.get_factor_variance()        # 得到贡献率fa_sd[1]
    fa_rotate = FactorAnalyzer(rotation='varimax', n_factors=rows, method='principal')
    fa_rotate.fit(data)
    # 查看旋转后的因子载荷
    # print("\n旋转后的因子载荷矩阵:\n", fa_rotate.loadings_)
    # 因子得分(回归方法)(系数矩阵的逆乘以因子载荷矩阵)
    X1 = np.mat(df2_corr)
    X1 = nlg.inv(X1)
    factor_score = np.dot(X1, fa_rotate.loadings_)
    # print("\n因子得分(每个样本的因子权重):\n", factor_score)
    fa_t_score = np.dot(np.matrix(data), np.matrix(factor_score))
    # print("\n样本的因子得分:\n", pd.DataFrame(fa_t_score))
    # 综合得分(加权计算)
    fa_t_score = np.dot(fa_t_score, fa_sd[1]) / sum(fa_sd[1])
    weight .append((fa_sd[1] / sum(fa_sd[1])).tolist())
    return np.array(fa_t_score)[0]

主函数

  代码如下:

def main():
    data = read_file(path, 1)      # 读取excel并保存
    # print("源数据", data)
    for i in range(6, 13):
        data[i] = 1-data[i]/data[i].max()
    Standard_data = Normalization(data)      # 对每一列最大最小归一化数据
    rows = Standard_data.shape[0]
    columns = Standard_data.shape[1]
    # 客观
    ret_topsis = topsis(Standard_data.T, columns, rows)
    ret_E = E_evalution(Standard_data.T, columns, rows)
    ret_critic = critic(Standard_data, columns, rows)
    ret_factor = factor(Standard_data.T, columns, rows)
    # 主观
    ret_ahp = ahp(Standard_data, rows, columns)
    # 合并
    result = np.dstack((ret_ahp, ret_topsis, ret_E, ret_critic, ret_factor))

if __name__ == '__main__':
    main()  

结果可视化

多种综合评价方法的python实现_第3张图片
  可以看出AHP效果不佳,分析发现主要原因是专家评分不合理,我们后期还可以继续修正。
  图中的Subjective,Objective,Final Score是联合模型的结果,联合方法见此博客:https://blog.csdn.net/Hjh1906008151/article/details/123431230

你可能感兴趣的:(数学建模,数据挖掘,python,数据分析)