在上一篇博客中,我简单地使用了scipy模块进行了统计学中三大相关性分析方法(皮尔森相关性系数、斯皮尔曼相关性系数、肯德尔相关性系数)的使用,这里的主要工作是将相关性计算结果进行可视化展示,以便于更直观地来对不同因子之间的相关性进行分析,下面是具体的实践:
#!usr/bin/env python
#encoding:utf-8
'''
__Author__:沂水寒城
功能: python实践统计学中的三大相关性系数,并绘制相关性分析的热力图
'''
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import pearsonr,spearmanr
def randomPlot():
'''
构造随机数矩阵来绘制热力图
'''
data=np.random.rand(8,8)
print(data)
fig,ax=plt.subplots(figsize=(10,10))
key_list=['A','B','C','D','E','F','G','H']
sns.heatmap(pd.DataFrame(np.round(data,4),columns=key_list,index=key_list),annot=True,vmax=1,vmin=0,xticklabels=True,
yticklabels=True,square=True,cmap="YlGnBu")
ax.set_title(' Heat Map ', fontsize=18)
ax.set_ylabel('Y', fontsize=18)
ax.set_xlabel('X', fontsize=18)
plt.savefig('Random.png')
def dataPlot():
'''
基于相关性系数计算结果来绘制
'''
data1=[[1.0, 0.679138216972867, 0.8723347117585479, 0.8678591195124671, 0.9091565423610072, -0.681888271075189, -0.7228630059512009, -0.681888271075189], [0.679138216972867, 1.0, 0.7808137626700863, 0.45749648218056993, 0.5348626411851899, -0.44326149230111495, -0.39558814315535173, -0.44326149230111495], [0.8723347117585479, 0.7808137626700863, 1.0, 0.7020628568851344, 0.7658545326255977, -0.7217424383082518, -0.5875065812285509, -0.7217424383082518], [0.8678591195124671, 0.45749648218056993, 0.7020628568851344, 1.0, 0.9903366506522474, -0.5874500887255807, -0.8382341966002466, -0.5874500887255807], [0.9091565423610072, 0.5348626411851899, 0.7658545326255977, 0.9903366506522474, 1.0, -0.6439505593700681, -0.8244259230387567, -0.6439505593700681], [-0.681888271075189, -0.44326149230111495, -0.7217424383082518, -0.5874500887255807, -0.6439505593700681, 1.0, 0.4821038714558963, 1.0], [-0.7228630059512009, -0.39558814315535173, -0.5875065812285509, -0.8382341966002466, -0.8244259230387567, 0.4821038714558963, 1.0, 0.4821038714558963], [-0.681888271075189, -0.44326149230111495, -0.7217424383082518, -0.5874500887255807, -0.6439505593700681, 1.0, 0.4821038714558963, 1.0]]
data2=[[0.9999999999999999, 0.8345554576233073, 0.9243854856819417, 0.9330027648545496, 0.9482209505391218, -0.6482501837911674, -0.8818546914603946, -0.6482501837911674], [0.8345554576233073, 0.9999999999999999, 0.9003585423988755, 0.7649830893396171, 0.7979521001324441, -0.4691190784521079, -0.6480284658216039, -0.4691190784521079], [0.9243854856819417, 0.9003585423988756, 1.0, 0.8506386730833658, 0.8830994621736679, -0.7006298478257242, -0.7676395373967932, -0.7006298478257242], [0.9330027648545496, 0.7649830893396171, 0.8506386730833659, 0.9999999999999999, 0.9951726440650351, -0.5278605167290854, -0.9273089460158745, -0.5278605167290854], [0.9482209505391219, 0.7979521001324442, 0.8830994621736679, 0.9951726440650353, 1.0, -0.5724194583833783, -0.9112198965249181, -0.5724194583833783], [-0.6482501837911674, -0.46911907845210793, -0.7006298478257242, -0.5278605167290855, -0.5724194583833783, 1.0, 0.49515922475047763, 1.0], [-0.8818546914603947, -0.6480284658216039, -0.7676395373967932, -0.9273089460158744, -0.9112198965249181, 0.49515922475047763, 1.0, 0.49515922475047763], [-0.6482501837911674, -0.46911907845210793, -0.7006298478257242, -0.5278605167290855, -0.5724194583833783, 1.0, 0.49515922475047763, 1.0]]
data3=[[1.0, 0.6447733742285494, 0.7648449086941359, 0.7781871747188993, 0.8077705036447606, -0.45314582645069557, -0.7063398069618333, -0.45314582645069557], [0.6447733742285494, 1.0, 0.723107089412363, 0.5724058545087037, 0.6071562414937144, -0.3151362329600627, -0.47203186715609546, -0.3151362329600627], [0.7648449086941359, 0.7231070894123629, 1.0, 0.6636168802979515, 0.7036503531839524, -0.5094145396347782, -0.5801992046091554, -0.5094145396347782], [0.7781871747188993, 0.5724058545087037, 0.6636168802979514, 1.0, 0.9417388451103401, -0.35110754201106087, -0.818036667286979, -0.35110754201106087], [0.8077705036447606, 0.6071562414937144, 0.7036503531839524, 0.9417388451103401, 1.0, -0.3871446641295609, -0.7851151107221745, -0.3871446641295609], [-0.45314582645069557, -0.3151362329600627, -0.5094145396347782, -0.35110754201106087, -0.3871446641295609, 1.0, 0.331222719795913, 1.0], [-0.7063398069618333, -0.4720318671560954, -0.5801992046091554, -0.818036667286979, -0.7851151107221744, 0.331222719795913, 1.0, 0.331222719795913], [-0.45314582645069557, -0.3151362329600627, -0.5094145396347782, -0.35110754201106087, -0.3871446641295609, 1.0, 0.331222719795913, 1.0]]
data1,data2,data3=np.array(data1),np.array(data2),np.array(data3)
fig,ax=plt.subplots(figsize=(10,10))
key_list=['A','B','C','D','E','F','G','H']
sns.heatmap(pd.DataFrame(np.round(data1,4),columns=key_list,index=key_list),annot=True,vmax=1,vmin=0,xticklabels=True,
yticklabels=True,square=True,cmap="YlGnBu")
ax.set_title(' Heat Map ', fontsize=18)
ax.set_ylabel('Y', fontsize=18)
ax.set_xlabel('X', fontsize=18)
plt.savefig('data1.png')
if __name__=='__main__':
randomPlot()
dataPlot()
其中,随机生成数矩阵得到的热力图如下:
基于相关性计算结果得到的三个热力图分别如下:
data1.png
data2.png
从上面三个热力图可以看到:虽然三者的计算结果有些偏差,但是整体的趋势都是相同的。