20210222业务总结:熵值法+AHP层次分析打分代码

在业务中经常遇到需要对多个对象进行打分的任务,如何针对不同属性赋予权重是关键一步。为了既满足数据客观性又满足主管需求,一般通过熵值法结合AHP层次分析法(即专家打分法)赋予权重并进行评分。

主要方法是先根据熵值法计算一轮,需要给定正负向指标以及对象列。

在此基础上,允许对各层权重、独立权重进行调整,以符合实际需求。主要由函数cal_Weight实现。

代码如下:

import pandas as pd

class EmtropyMethod:
    def __init__(self, index, positive, negative, city):
        if len(index) != len(city):
            raise Exception('数据指标行数与行名称数不符')
        if sorted(index.columns) != sorted(positive + negative):
            raise Exception('正项指标加负向指标不等于数据指标的条目数')

        self.index = index.copy().astype('float64')
        self.positive = positive
        self.negative = negative
        self.city = city.copy()

    def uniform(self):
        uniform_mat = self.index.copy()
        min_index = {column: min(uniform_mat[column]) for column in uniform_mat.columns}
        max_index = {column: max(uniform_mat[column]) for column in uniform_mat.columns}
        for i in range(len(uniform_mat)):
            for column in uniform_mat.columns:
                if column in self.positive:
                    uniform_mat[column][i] = (uniform_mat[column][i] - min_index[column]) / (
                                max_index[column] - min_index[column])
                else:
                    uniform_mat[column][i] = (max_index[column] - uniform_mat[column][i]) / (
                                max_index[column] - min_index[column])
        self.uniform_mat = uniform_mat
        return self.uniform_mat

    def calc_probability(self):
        try:
            p_mat = self.uniform_mat.copy()
        except AttributeError:
            raise Exception('你还没进行归一化处理,请先调用uniform方法')
        for column in p_mat.columns:
            sigma_x_1_n_j = sum(p_mat[column])
            p_mat[column] = p_mat[column].apply(lambda x_i_j: x_i_j / sigma_x_1_n_j if x_i_j / sigma_x_1_n_j != 0 else 1e-6)

        self.p_mat = p_mat
        return p_mat

    def calc_emtropy(self):
        try:
            self.p_mat.head(0)
        except AttributeError:
            raise Exception('你还没计算比重,请先调用calc_probability方法')

        import numpy as np
        e_j  = -(1 / np.log(len(self.p_mat)+1)) * np.array([sum([pij*np.log(pij) for pij in self.p_mat[column]]) for column in self.p_mat.columns])
        ejs = pd.Series(e_j, index=self.p_mat.columns, name='指标的熵值')

        self.emtropy_series = ejs
        return self.emtropy_series

    def calc_emtropy_redundancy(self):
        try:
            self.d_series = 1 - self.emtropy_series
            self.d_series.name = '信息熵冗余度'
        except AttributeError:
            raise Exception('你还没计算信息熵,请先调用calc_emtropy方法')

        return self.d_series

    def calc_Weight(self):
        self.uniform()
        self.calc_probability()
        self.calc_emtropy()
        self.calc_emtropy_redundancy()
        self.Weight = self.d_series / sum(self.d_series)
        self.Weight.name = '权值'
        '''
        【总体调整:分层调整比例】
        f1:人口
        f2:交通
        f3:城市品质
        f4:人才基础
        f5:产业因素
        f6:生态因素
        '''
        f1 = ["常住人口五年增长率(%)", "城镇人口增量省域占比", "城区增长潜力"]
        f2 = ["市域高铁站数量", "市域高速公路出入口数量"]
        f3 = ["公园绿地面积占城市建设用地比重(%)", "全市星巴克咖啡店数量", "城镇居民人均可支配收入"]
        f4 = ["全市本科高校数量", "劳动年龄人口数量"]
        f5 = ['生产总值', "城镇就业人员5年增长率"]
        f6 = ["水规模承载力", "适宜建设用地总量"]
        f1_weight = 1
        f2_weight = 1
        f3_weight = 1
        f4_weight = 1
        f5_weight = 1
        f6_weight = 1
        for obj in self.Weight.index:
            if obj in f1:
                self.Weight[obj] = self.Weight[obj] * f1_weight
            elif obj in f2:
                self.Weight[obj] = self.Weight[obj] * f2_weight
            elif obj in f3:
                self.Weight[obj] = self.Weight[obj] * f3_weight
            elif obj in f4:
                self.Weight[obj] = self.Weight[obj] * f4_weight
            elif obj in f5:
                self.Weight[obj] = self.Weight[obj] * f5_weight
            elif obj in f6:
                self.Weight[obj] = self.Weight[obj] * f6_weight

        '''
        【末端:单个指标调整权重】
        '''
        # self.Weight['常住人口五年增长率(%)'] = 0.03
        return self.Weight


    def calc_score(self):
        self.calc_Weight()

        import numpy as np
        self.score = pd.Series(
            [np.dot(np.array(self.uniform_mat[row:row + 1])[0], np.array(self.Weight)) for row in range(len(self.uniform_mat))],
            index=self.city, name='得分'
        )
        # self.score = self.score.sort_values(ascending=False)  # 是否排序
        return self.score

if __name__ == '__main__':
    # 1读取数据
    # df = pd.read_csv('shangzhi.csv', encoding='gb2312')
    df = pd.read_excel('E:/砖/河南省国土空间规划/市县人口分配评价数据.xlsx')
    # 2数据预处理 ,去除空值的记录
    df.dropna()
    # print(df.head())

    # 分离数据,设置正负向指标
    indexs = ["常住人口五年增长率(%)", "城镇人口增量省域占比", "城区增长潜力", "市域高铁站数量", "市域高速公路出入口数量", "公园绿地面积占城市建设用地比重(%)",
              "全市星巴克咖啡店数量", "城镇居民人均可支配收入", "全市本科高校数量", "劳动年龄人口数量", '生产总值',
              "城镇就业人员5年增长率", "水规模承载力", "适宜建设用地总量"]

    Negative = ["水规模承载力"]
    Positive = ["常住人口五年增长率(%)", "城镇人口增量省域占比", "城区增长潜力", "市域高铁站数量", "市域高速公路出入口数量",
                "公园绿地面积占城市建设用地比重(%)", "全市星巴克咖啡店数量", "城镇居民人均可支配收入", "全市本科高校数量",
                "劳动年龄人口数量", '生产总值', "城镇就业人员5年增长率", "适宜建设用地总量"]

    city = df['名称']
    index = df[indexs]
    em = EmtropyMethod(index, Positive, Negative, city)
    em.calc_Weight().to_csv('E:/砖/河南省国土空间规划/weight.csv', encoding='gbk')
    score = em.calc_score()/em.calc_score().sum()
    score.to_csv('E:/砖/河南省国土空间规划/score.csv', encoding='gbk')

数据集链接:https://pan.baidu.com/s/1VqXpTNIH7LiEzObGnu-lYw 
提取码:b1el

你可能感兴趣的:(业务总结,数据分析)