在进行数据分析的时候,图形可以帮助我们更直观的了解数据形态,那么常用的都有哪些图形呢?这些图形要怎么绘制?今天我们就先学习一下如何绘制图形,可以更直观的表示两个变量之间的相关性。
import numpy as np
import pandas as pd
from pandas import DataFrame as df
from sklearn.datasets import load_boston
from matplotlib import pyplot as plt
import seaborn as sns
## 提取数据
data = df(load_boston().data, columns=load_boston().feature_names)
data['target'] = load_boston().target
## 描述两变量相关性
coeff = data.iloc[:,:-1].corr() # 相关性
sns.heatmap(coeff)
# vmin=None, vmax=None:用于锚定色彩映射的值,否则它们是从数据和其他关键字参数推断出来的
sns.heatmap(coeff, vmin=-1, vmax=1)
# cmap=None:颜色的选择,后面带“_r”的颜色倒置:Accent, Accent_r, Blues, Blues_r, BrBG, BrBG_r, BuGn, BuGn_r, BuPu, BuPu_r, CMRmap, CMRmap_r, Dark2, Dark2_r, GnBu, GnBu_r, Greens, Greens_r, Greys, Greys_r, OrRd, OrRd_r, Oranges, Oranges_r, PRGn, PRGn_r, Paired, Paired_r, Pastel1, Pastel1_r, Pastel2, Pastel2_r, PiYG, PiYG_r, PuBu, PuBuGn, PuBuGn_r, PuBu_r, PuOr, PuOr_r, PuRd, PuRd_r, Purples, Purples_r, RdBu, RdBu_r, RdGy, RdGy_r, RdPu, RdPu_r, RdYlBu, RdYlBu_r, RdYlGn, RdYlGn_r, Reds, Reds_r, Set1, Set1_r, Set2, Set2_r, Set3, Set3_r, Spectral, Spectral_r, Wistia, Wistia_r, YlGn, YlGnBu, YlGnBu_r, YlGn_r, YlOrBr, YlOrBr_r, YlOrRd, YlOrRd_r
sns.heatmap(coeff, cmap='YlGnBu')
# center=None:可以调整图像颜色的深浅
sns.heatmap(coeff, cmap='YlGnBu', center=0.5)
# linewidths=0:单元格间宽度
sns.heatmap(coeff, cmap='YlGnBu', linewidths=3)
# linecolor='white':单元格间的颜色
sns.heatmap(coeff, cmap='YlGnBu', linewidths=3, linecolor='black')
# cbar=True:是否绘制颜色条
sns.heatmap(coeff, cmap='YlGnBu', cbar=False)
# square=False:True:每个单元格为方形
sns.heatmap(coeff, cmap='YlGnBu', square=True)
# annot=None:单元格内写入值
sns.heatmap(coeff, cmap='YlGnBu', annot=True)
#annot_kws=None:单元格内写入值的设置
sns.heatmap(coeff, cmap='YlGnBu', annot=True, annot_kws={'size':5,'weight':'bold', 'color':'white'})
# fmt='.2g',设置字符格式
sns.heatmap(coeff, cmap='YlGnBu', annot=True, annot_kws={'size':5,'weight':'bold', 'color':'white'}, fmt='.3f')
# xticklabels='auto', yticklabels='auto':设置轴名称,False:不设置,也可以为列表
sns.heatmap(coeff, cmap='YlGnBu', xticklabels=False,yticklabels=False)
pairplot不仅可以绘制变量X间的相关性,也可以展示每个变量X与结果Y之间的关系,下面我们就以X与Y的关系为例绘制图形。
from sklearn.datasets import load_breast_cancer
data = df(load_breast_cancer().data, columns=load_breast_cancer().feature_names)
data['target'] = load_breast_cancer().target
# 我们这里只取后面几个字段
data1 = data.iloc[:,-5:]
sns.pairplot(data1)
pairplot 常用参数理解
# hue=None:针对某一字段进行分类
sns.pairplot(data1, hue='target')
# hue_order=None:颜色变量的顺序
sns.pairplot(data1, hue='target', hue_order=[1,0])
# palette=None:设置分类字段的颜色
sns.pairplot(data1, hue='target', palette='YlGnBu')
# kind='scatter':非对角线上的图形形式,共四种:scatter、kde、hist、reg
sns.pairplot(data1, hue='target', palette='YlGnBu', kind='kde')
sns.pairplot(data1, hue='target', palette='YlGnBu', kind='hist')
sns.pairplot(data1, hue='target', palette='YlGnBu', kind='reg')
# diag_kind='auto':对角线上的图形形式,hist、kde
sns.pairplot(data1, hue='target', palette='YlGnBu', kind='scatter', diag_kind='hist')
# markers=None:散点的样式,'o','*','s','D','p','>'
sns.pairplot(data1, hue='target', palette='YlGnBu', markers=['*','s'])
# corner=False:corner=True,下三角显示图形
sns.pairplot(data1, hue='target', palette='YlGnBu', corner=True)
# height=2.5:每张图的高,aspect=1:宽度,相对于高, 0.5就是 高*0.5
sns.pairplot(data1, hue='target', palette='YlGnBu', height=1)
# x_vars=None,y_vars=None:指定x_vars和y_vars
sns.pairplot(data1, hue='target', palette='YlGnBu', x_vars=['worst concavity','worst concave points'], y_vars=['worst symmetry','worst fractal dimension'])
详细内容请看我公众号~
【python画图_变量相关性(heatmap、pairplot)】