分类数据可视化 - 统计图
barplot() / countplot() / pointplot()
1. barplot()
#柱状图 - 置信区间估计
#置信区间:样本均值 + 抽样误差
示例1:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_context('paper')
#加载数据
titanic = sns.load_dataset('titanic')
print(titanic.head())
sns.barplot(x = 'sex', y = 'survived', hue = 'class', data = titanic,
palette = 'hls',
order = ['male', 'female'], #筛选类别
capsize = 0.05, #误差线横向延申宽度
saturation = 8, #颜色饱和度
errcolor = 'gray', errwidth = 2, #误差线颜色、宽度
ci = 'sd' #置信区间误差 --> 0-100内值、 'sd' 、None
)
#计算数据
print(titanic.groupby(['sex', 'class']).mean()['survived'])
print(titanic.groupby(['sex', 'class']).std()['survived'])
示例2:
tips = sns.load_dataset('tips')
sns.barplot(x = 'day', y = 'total_bill', hue = 'sex', data = tips,
palette = 'Blues', edgecolor = 'w')
tips.groupby(['day','sex']).mean()
示例3:
#加载数据
crashes = sns.load_dataset('car_crashes').sort_values('total', ascending = False)
#创建图表
f, ax = plt.subplots(figsize = (6,15))
#设置第一个柱状图
sns.set_color_codes('pastel')
sns.barplot(x = 'total', y = 'abbrev', data = crashes,
label = 'Total', color = 'b', edgecolor = 'w')
#设置第二个柱状图
sns.set_color_codes('muted')
sns.barplot(x = 'alcohol', y = 'abbrev', data = crashes,
label = 'Alcohol-involved', color = 'b', edgecolor = 'w')
ax.legend(ncol = 2, loc = 'lower right')
sns.despine(left = True, bottom = True)
2、countplot()
#计数柱状图
#x/y --> 以x或者y轴绘图(横向,竖向)
#用法和barplot相似
sns.countplot(x = 'class', hue = 'who', data = titanic, palette = 'magma')
sns.pointplot(x = 'time', y = 'total_bill', hue = 'smoker', data=tips,
palette = 'hls',
dodge = True, #设置点是否分开
join = True, #是否连线
markers = ['o','x'],linestyles = ['-','--'],#设置点样式、线型
)
#计算数据
tips.groupby(['time','smoker']).mean()['total_bill']
3.pointplot()
#折线图 - 置信区间估计