目录
用pandas计算相关系数
用numpy计算相关系数
自己编写函数计算相关系数
计算相关系数用pandas,比如我想知道风速大小与风向紊乱(标准差来衡量)之间的相关系数,下面是代码:
import pandas as pd
import pylab as plt
#每小时的阵风风速平均值
all_gust_spd_mean_list = [8.21529411764706, 7.872941176470587, 7.829411764705882, 8.354117647058825, 9.025882352941174, 9.384523809523811, 9.57294117647059, 9.274117647058821, 9.050588235294118, 9.314117647058827, 8.924705882352939, 9.25176470588235, 8.978823529411764, 8.39176470588235, 7.715294117647061, 7.477647058823529, 7.272941176470586, 7.38470588235294, 7.396470588235295, 7.97261904761905, 7.716666666666666, 7.7809523809523835, 7.816666666666668, 7.897590361445783, 8.200000000000001, 8.04761904761905, 7.474999999999999, 9.855952380952383, 11.120000000000001, 10.979761904761906, 10.922619047619051, 10.841176470588234, 9.31566265060241, 8.867058823529415, 9.068235294117642, 8.774698795180722, 8.629411764705884, 8.292941176470586, 7.640000000000007, 7.422352941176469, 7.464705882352944, 8.210588235294113, 8.558823529411763, 8.93095238095238, 9.001176470588234, 8.538095238095238, 8.965882352941172, 9.855294117647057, 8.318918918918921, 9.217647058823525, 8.86470588235294, 8.840000000000002, 9.44235294117647, 9.352380952380953, 9.307058823529408, 9.64047619047619, 9.408333333333333, 9.585882352941175, 8.901190476190477, 7.698823529411764, 7.988235294117645, 9.091764705882353, 9.294117647058819, 8.996470588235297, 9.63764705882353, 9.091764705882353, 8.937647058823533, 8.838823529411764, 8.637647058823534, 8.46, 8.374117647058824, 8.24117647058823, 8.245238095238093, 8.365882352941174, 8.50235294117647, 8.291764705882352, 8.088235294117647, 7.889411764705883, 7.594117647058826, 7.216470588235293, 7.097647058823533, 7.305882352941181, 7.489411764705882, 6.815294117647058, 7.971428571428569, 7.424705882352936, 6.910588235294117, 6.071764705882354, 7.44117647058823, 7.667857142857143, 7.881176470588237, 7.929411764705881, 8.12142857142857, 8.822352941176472, 9.083529411764703, 9.028235294117646, 9.310714285714285, 9.035294117647057, 8.450588235294116, 8.414285714285713, 7.311764705882355, 6.840000000000001, 7.238095238095239, 6.641176470588236, 6.8047619047619055, 6.58705882352941, 6.826190476190474, 6.568235294117643, 7.060000000000001, 7.686904761904761, 8.348235294117643, 8.503529411764701, 8.287058823529414, 8.354117647058823, 7.624705882352941, 7.286904761904765, 7.361176470588235, 7.477647058823531, 7.343529411764706]
#每小时的阵风风向标准差
all_gust_agl_dev_list = [0.7507438242046189, 0.768823513771462, 0.849877567310481, 0.8413581558472801, 0.8571319461950748, 0.8665002025305942, 0.9053739533298005, 0.8866979720735791, 0.8045677876888446, 0.873463882661469, 0.832383480871403, 0.778659970340069, 0.7357031045047981, 0.7974723911258534, 0.8039727543149432, 0.8709723763624072, 0.8727745464337923, 0.7896422160341138, 0.8165093346129041, 0.8821296270775546, 0.9193591477905156, 0.8546566314487358, 0.8595040204296921, 0.8075641299052398, 0.7996745617071098, 0.7930869411601498, 0.7578880032016914, 0.9107571156507569, 0.8461201382346486, 0.7553646348127085, 0.8510861123303187, 0.7282631202385544, 0.8588017730198183, 0.7923449370076744, 0.8265083209111689, 0.9599970229643688, 0.8195276021290412, 0.7882592259148272, 0.8036464793287409, 0.8237184691421926, 0.8846862360656914, 0.8136869244513337, 0.8516383375155133, 0.7760301715652644, 0.8644231334629017, 0.831330440569484, 0.8061342111854616, 0.7345896810176235, 1.205089147978776, 0.8266315966774649, 0.8137345300107962, 0.8186966603954983, 0.7836182115343135, 0.8406438908681332, 0.7717723331806998, 0.7932664155269176, 0.7266183593077442, 0.719063143819583, 0.8846434855533486, 0.817552510948495, 0.7571575934024827, 0.865326265251608, 0.9099784335052563, 0.8591794583996128, 0.9295389095340467, 0.8787300860744375, 0.8724277968300532, 0.95284132003256, 0.9288772059881606, 0.8690944948691984, 0.8327213470469693, 0.8339075062700629, 0.886835675339985, 0.8439137877550847, 0.7985495396895048, 0.8406267016063169, 0.8477871130878305, 0.8844025576348077, 0.9186363354492758, 0.8888539157167654, 0.9079462071375304, 0.8699806402308554, 0.8531937701209343, 0.8833108936555343, 0.9317958602705915, 0.9393618445471649, 0.9556065912926689, 0.967220118643412, 0.8882194173154115, 0.9361538853249073, 0.7872261833965604, 0.8608377368219552, 0.8787718518619395, 0.8169189082396561, 0.7965901553530427, 0.8838665737610132, 0.8844338861256802, 0.9008484784943429, 0.8612318707072047, 0.8623792153658019, 1.0033494995180463, 0.9901213381586231, 0.8780115045650467, 0.9172682690843976, 0.9653905755824115, 0.9199829176728873, 0.9180048223906779, 0.9172043382441968, 0.9267783259554074, 0.9231225672912022, 0.7945054721199195, 0.8655558517080688, 0.8306327906597787, 0.8457559701865576, 0.8038459124570336, 0.8519646989317945, 0.7735358658599594, 0.8612134954656397, 0.8879135146161856]
g_s_m = pd.Series(all_gust_spd_mean_list) #利用Series将列表转换成新的、pandas可处理的数据
g_a_d = pd.Series(all_gust_agl_dev_list)
corr_gust = round(g_s_m.corr(g_a_d), 4) #计算标准差,round(a, 4)是保留a的前四位小数
print('corr_gust :', corr_gust)
#最后画一下两列表散点图,直观感受下,结合相关系数揣摩揣摩
plt.scatter(all_gust_spd_mean_list, all_gust_agl_dev_list)
plt.title('corr_gust :' + str(corr_gust), fontproperties='SimHei') #给图写上title
plt.show()
根据以上程序,得到结果:
corr_gust : -0.3481
以及图片:
参考:http://www.30daydo.com/article/178
此外,还可以计算多个列表的相关系数矩阵,即多个列表两两之间的相关系数
import pandas as pd
import numpy as np
if __name__ == '__main__':
unstrtf_lst = [[2.136, 1.778, 1.746, 2.565, 1.873, 2.413, 1.813, 1.72, 1.932, 1.987, 2.035, 2.178, 2.05, 2.016, 1.645, 1.756, 1.886, 2.106, 2.138, 1.914, 1.984, 1.906, 1.871, 1.939, 1.81, 1.93, 1.898, 1.802, 2.008, 1.724, 1.823, 1.636, 1.774, 2.055, 1.934, 1.629, 2.519, 2.093, 2.004, 1.793, 1.564, 1.962, 2.176, 1.846, 1.816, 2.018, 1.708, 2.465, 1.899, 1.523, 1.41, 2.102, 2.065, 2.402, 2.091, 1.867, 1.77, 1.466, 2.029, 1.659, 1.626, 1.977, 1.837, 2.13, 2.241, 2.184, 2.345, 1.833, 2.113, 1.764, 1.859, 1.868, 1.835, 1.906, 2.237, 1.846, 1.871, 1.769, 1.928, 1.831, 1.875, 2.039, 2.24, 1.835, 1.851]
, [2.171, 1.831, 1.714, 2.507, 1.793, 2.526, 1.829, 1.705, 1.954, 2.017, 2.022, 2.16, 2.059, 1.966, 1.661, 1.752, 1.884, 2.203, 2.182, 1.97, 2.003, 1.875, 1.852, 1.884, 1.774, 1.916, 1.936, 1.809, 1.926, 1.717, 1.841, 1.59, 1.781, 2.016, 1.898, 1.657, 2.458, 2.134, 2.032, 1.785, 1.575, 1.959, 2.11, 1.854, 1.826, 1.992, 1.706, 2.419, 1.854, 1.514, 1.37, 2.084, 2.024, 2.398, 1.955, 1.859, 1.759, 1.441, 2.059, 1.653, 1.583, 1.987, 1.84, 2.106, 2.262, 2.13, 2.371, 1.776, 2.117, 1.733, 1.814, 1.839, 1.822, 1.883, 2.23, 1.803, 1.894, 1.783, 1.911, 1.813, 1.85, 2.004, 2.191, 1.823, 1.809]
, [2.157, 1.873, 1.802, 2.761, 1.733, 2.506, 1.842, 1.765, 1.938, 2.058, 1.932, 2.196, 2.004, 2.126, 1.664, 1.698, 1.899, 2.073, 2.117, 2.083, 1.972, 1.969, 1.865, 1.937, 1.752, 1.939, 1.927, 1.804, 2.07, 1.725, 1.846, 1.5, 1.804, 2.1, 1.932, 1.773, 2.431, 2.088, 2.08, 1.812, 1.592, 1.953, 2.044, 2.019, 1.846, 2.061, 1.771, 2.254, 1.891, 1.536, 1.356, 1.952, 2.222, 2.427, 2.015, 1.873, 1.79, 1.384, 1.981, 1.665, 1.815, 2.006, 1.869, 2.102, 2.249, 2.27, 2.296, 1.814, 2.099, 1.702, 1.688, 1.89, 1.82, 1.927, 2.162, 1.825, 1.998, 1.811, 2.0, 1.842, 1.793, 2.115, 2.301, 1.789, 1.826]
, [2.127, 1.744, 1.747, 2.548, 1.939, 2.296, 1.808, 1.71, 1.901, 1.906, 2.074, 2.167, 2.113, 2.044, 1.632, 1.821, 1.94, 2.076, 2.114, 1.837, 1.978, 1.904, 1.872, 1.98, 1.886, 1.923, 1.875, 1.799, 1.992, 1.704, 1.812, 1.715, 1.756, 2.061, 1.94, 1.554, 2.592, 2.065, 1.983, 1.802, 1.57, 1.955, 2.215, 1.765, 1.796, 2.006, 1.662, 2.573, 1.915, 1.543, 1.439, 2.16, 2.012, 2.42, 2.268, 1.886, 1.767, 1.527, 2.073, 1.65, 1.567, 2.016, 1.819, 2.153, 2.225, 2.237, 2.327, 1.877, 2.115, 1.804, 1.939, 1.867, 1.84, 1.905, 2.302, 1.883, 1.798, 1.725, 1.893, 1.846, 1.916, 2.025, 2.268, 1.867, 1.877]
, [2.089, 1.664, 1.72, 2.441, 2.031, 2.321, 1.773, 1.702, 1.935, 1.968, 2.119, 2.191, 2.023, 1.925, 1.621, 1.75, 1.822, 2.074, 2.139, 1.764, 1.982, 1.873, 1.895, 1.955, 1.829, 1.945, 1.853, 1.794, 2.046, 1.75, 1.793, 1.741, 1.752, 2.042, 1.965, 1.532, 2.598, 2.086, 1.923, 1.771, 1.517, 1.98, 2.338, 1.743, 1.794, 2.014, 1.693, 2.618, 1.938, 1.5, 1.476, 2.216, 2.003, 2.361, 2.13, 1.85, 1.764, 1.513, 2.001, 1.669, 1.538, 1.897, 1.819, 2.163, 2.226, 2.099, 2.386, 1.865, 2.121, 1.818, 2.0, 1.876, 1.858, 1.908, 2.254, 1.874, 1.791, 1.759, 1.908, 1.822, 1.944, 2.012, 2.201, 1.863, 1.892]
]
column_lst = ['whole_year', 'spring', 'summer', 'autumn', 'winter']
# 计算列表两两间的相关系数
data_dict = {} # 创建数据字典,为生成Dataframe做准备
for col, gf_lst in zip(column_lst, unstrtf_lst):
data_dict[col] = gf_lst
unstrtf_df = pd.DataFrame(data_dict)
cor1 = unstrtf_df.corr() # 计算相关系数,得到一个矩阵
print(cor1)
print(unstrtf_df.columns.tolist())
结果如下:
whole_year spring summer autumn winter
whole_year 1.000000 0.986011 0.943254 0.980358 0.965415
spring 0.986011 1.000000 0.944394 0.945710 0.930887
summer 0.943254 0.944394 1.000000 0.876008 0.833568
autumn 0.980358 0.945710 0.876008 1.000000 0.977426
winter 0.965415 0.930887 0.833568 0.977426 1.000000
['whole_year', 'spring', 'summer', 'autumn', 'winter']
[Finished in 0.5s]
这里不再具体举例子了,直接上函数:
# 这里u1是一个矩阵,可以自己构造,也可以来自dataframe类型:比如u1=a_df.values
np.corrcoef(u1) # 计算矩阵所有行的相关系数
np.corrcoef(u1.T) # 计算矩阵所有列的相关系数
np.around(np.corrcoef(u1), decimals=3) # 这里是将矩阵结果保留3位小数
相关系数计算公式:
import pandas as pd
import math
# 函数:计算相关系数
def calc_corr(a, b):
a_avg = sum(a)/len(a)
b_avg = sum(b)/len(b)
# 计算分子,协方差————按照协方差公式,本来要除以n的,由于在相关系数中上下同时约去了n,于是可以不除以n
cov_ab = sum([(x - a_avg)*(y - b_avg) for x,y in zip(a, b)])
# 计算分母,方差乘积————方差本来也要除以n,在相关系数中上下同时约去了n,于是可以不除以n
sq = math.sqrt(sum([(x - a_avg)**2 for x in a])*sum([(x - b_avg)**2 for x in b]))
corr_factor = cov_ab/sq
return corr_factor
if __name__ == '__main__':
a=[2.1653572007579593, 1.6883696588873887, 1.651425407801895, 2.2299854374330415, 1.7922306220578152, 2.113529406925977, 1.8072576529258022, 1.6619459785959674, 1.8433349117064848, 1.830156003014785, 1.995333114793997, 2.1119786058625314, 2.0885749238172453, 2.0352203568050093, 1.6179657744312377, 1.8171419211111066, 1.7958509222039798, 2.0635601390477394, 2.1215391543655637, 1.8370107139324998, 1.9080529013404595, 1.8142460361148207, 1.8540680605856414, 1.8875508126623393, 1.831566482733203, 1.8780989145241431, 1.8510142426569105, 1.7663870994315451, 1.8119839179642034, 1.6843605863881175, 1.7955302280877627, 1.636906960652483, 1.7194807648617405, 1.9658394488708448, 1.898416417616442, 1.4759321604160809, 2.323803661481871, 1.8769469045284484, 1.8722184196962555, 1.7572526764029732, 1.5557166087197607, 1.8797685204289134, 2.121226143459225, 1.6642017944077512, 1.7472961612960178, 1.9730916979451159, 1.599670318309796, 2.571816259771537, 1.865138228024494, 1.5000996232338855, 1.3928618724470463, 2.2495996610383964, 1.8123728869502833, 2.38651467726676, 2.1662270090414606, 1.849308415655705, 1.723236705400253, 1.526968232018129, 2.073685760455388, 1.6217330036091566, 1.5393098152901363, 1.934097094067261, 1.7724313817029405, 2.0884179221557866, 2.07292956021531, 2.0699873046153954, 2.2232673322791827, 1.7725066092979982, 2.0685477779177055, 1.7725974367148223, 1.9166295058392768, 1.7617609193517068, 1.813075132807376, 1.8257878154680307, 2.2437538529398444, 1.7486220604934224, 1.7108204176980777, 1.7008700704044992, 1.7656760907642393, 1.7865088763312098, 1.8197007789917394, 2.029308787047278, 2.1153740255955116, 1.8164832177052999, 1.7631578858166328]
b=[2.060472329023772, 1.8229873524889462, 1.8340939384974153, 2.7992607339895117, 1.6386430473722386, 2.6863735005361127, 1.837418820905206, 1.6839633919848138, 1.9324516095906468, 2.202357206559493, 1.8845930752558508, 2.236205151912301, 1.8721339205267113, 2.210677601843676, 1.600486811395212, 1.5959949497266086, 1.6935368457211848, 1.9225683452842803, 2.070028267879934, 2.21026368629012, 1.9037935496384224, 1.9327073799581955, 1.829655016157694, 1.871952490421074, 1.7077577799677457, 1.930869014924102, 1.9194158360266231, 1.756182345097486, 2.1850192756376896, 1.7382175288447934, 1.8396075766762512, 1.4219634956892804, 1.8415616922656013, 2.0954072448607093, 1.9126234702257543, 1.9193927496754895, 2.3827058942496806, 1.9648325128486095, 2.0220582287578885, 1.7979299492052836, 1.5496290364943117, 1.910875647672739, 1.9842615165051285, 2.1100210075512824, 1.803544011837867, 2.232027673815636, 1.8934372137054605, 2.1627154441076937, 1.8748707756291958, 1.4668995002228604, 1.3246334541267288, 1.8396494252805005, 2.3688123714848675, 2.336307853359804, 1.9091101911129924, 1.8584589874801458, 1.7573810859876628, 1.2926901611210995, 1.9610407369359222, 1.6523460064563988, 1.754942441907064, 1.9320536352480018, 1.8012104839042546, 2.036906849409057, 2.1802647786624125, 2.191382376122134, 2.2989606091839114, 1.7623619697993311, 2.0639438073684104, 1.6271644912042054, 1.582459595381037, 1.8995793224027187, 1.7507677090017424, 1.9975456593566516, 2.008282198102043, 1.8218912714780746, 2.11945852516168, 1.8031669408615743, 2.175089825880799, 1.8075893333263815, 1.7588846094594992, 2.0752823056821317, 2.1812895620089714, 1.7186172119942524, 1.8537786391164277]
b_s = pd.Series(b)
a_s = pd.Series(a)
cor1 = a_s.corr(b_s)
# 自编函数计算两个列表的相关系数
cor2 = calc_corr(a,b)
# 可以发现两者结果是一样的
print(cor1, cor2)