Python数据标准化处理

Python数据标准化处理

归一化,Z-Socre法

import numpy as np
import pandas as pd
import copy
import jenkspy
#逆向指标
def minmaxNormalization(x):
    x = (np.max(x)-x)/(np.max(x)-np.min(x))
    return x
#正向指标
def maxminNormalization(x):
    x = (x-np.min(x))/(np.max(x)-np.min(x))
    return x
#z-score
def zscoreNormalization(x):
    x=(x-np.mean(x))/np.std(x)
    return x
#归一化具体代码
df= pd.read_excel('D:\\Desktop\\1204.xlsx','Sheet1')

#正向指标
def maxminNormalization(*x):
    for i in range(len(x)):
        value = df.loc[:,x[i]]
        Normal_value = (value-np.min(value))/(np.max(value)-np.min(value))
        df.loc[:,x[i]] = Normal_value
        # df.loc[:,x[i]] = (df.loc[:,x[i]]-np.min(df.loc[:,x[i]]))/(np.max(df.loc[:,x[i]])-np.min(df.loc[:,x[i]]))
#逆向指标
def minmaxNormalization(*x):
    for i in range(len(x)):
        value = df.loc[:,x[i]]
        Normal_value = (np.max(value)-value)/(np.max(value)-np.min(value))
        df.loc[:,x[i]] = Normal_value
#运行函数
#正向指标
maxminNormalization('GDP','PPP','NEx14','AgrGDP','TraRely','Water')  #对应指标的列名
#逆向指标
minmaxNormalization('AgrLand','ForLand','Wstress','Popu','Transport','Contig')

补充内容:自然断点分类、对数化

df = pd.read_excel('D:\\Desktop\\1204.xlsx','Sheet1')

#自然断点法对数据分类
def getBreakNum(breaks, val):
    for i in range(len(breaks)-1):
        if val >= breaks[i] and val<=breaks[i+1]:
            return i+1
          
classify_df = copy.deepcopy(df)

for colume in range(37): 
    colval = df.iloc[:,colume]
    breaks = jenkspy.jenks_breaks(colval,nb_class=7) #分类数nb_class
    colval2 = copy.deepcopy(colval)
    for i,val in enumerate(colval2):
        colval2[i] = getBreakNum(breaks,val)
    classify_df.iloc[:,colume] = colval2

classify_df.to_excel('D:\\Desktop\\1129指标分级.xlsx', index = False)
#对数化
df = pd.read_csv('C:\\Users\\Richard_Chen\\Desktop\\loghot.csv')
df.iloc[:,1:19] = df.iloc[:,1:19].apply(np.log1p)
df.to_csv('C:\\Users\\Richard_Chen\\Desktop\\loghot_r.csv')
#按当前时间命名标准化处理后的文件
from datetime import datetime #引入库中的包
path = "D:\\"
filename = "Output" +'_'+ datetime.now().strftime('%m%d%H%M') + '.xls'
newpath = path + filename
classify_df.to_excel(newpath, index = False, header=True)
#index = False 不带序列号 ; header = True 有列名

你可能感兴趣的:(python)