上四分位数Q1:将序列平均分为四份,可以描述为median之前两部分的中值
下四分位数Q3:可以描述为median之前两部分的中值
#plt.plot.box(),plt.boxplot()
#plt.plot.box()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
fig,axes = plt.subplots(2,1,figsize = (10,6))
df = pd.DataFrame(np.random.rand(10,5),
columns=['A', 'B', 'C', 'D', 'E'])
color = dict(boxes = 'DarkGreen',
whiskers = 'DarkOrange',
medians = 'DarkBlue',caps = 'Gray' )
df.plot.box(ylim=[0,1.2],
grid = True,
color = color,
ax = axes[0])
# color:样式填充
df.plot.box(vert = False,
positions=[1,4,5,6,8],
grid = True,
ax = axes[1],
color = color)
# 箱型图着色
# boxes → 箱线
# whiskers → 分位数与error bar横线之间竖线的颜色
# medians → 中位数线颜色
# caps → error bar横线颜色
#vert → 是否垂直,默认为True
#position →箱型图占位
#plt.boxplot()
df = pd.DataFrame(np.random.rand(10,5),
columns=['A', 'B', 'C', 'D', 'E'])
fig = plt.figure(figsize = (10,4))#先创图表
f = df.boxplot(sym = 'o',
vert = False,
whis = 1.5,
patch_artist = True,
meanline = False,
showmeans = True,
showbox = True,
showcaps = True,
showfliers = True,
notch = False ,
return_type = 'dict')#返回类型为字典,不返回也可以
plt.title('boxplot')
#通过一个遍历去更改颜色和线型
for box in f['boxes']:
box.set(color = 'b',linewidth = 1)#箱体边框颜色
box.set(facecolor = 'b',alpha = 0.5)#箱体内部填充颜色
for whisker in f['whiskers']:
whisker.set(color = 'k',linewidth = 0.5,linestyle = '-')
for cap in f['caps']:
cap.set(color = 'gray',linewidth = 2)
for median in f['medians']:
median.set(color = 'DarkBlue',linewidth = 2)
for flier in f['fliers']:
flier.set(marker = 'o',color = 'y',alpha = 0.5)
# sym : 异常点形状,参考marker
# whis : IQR,默认1.5,也可以设置区间比如[5,95],代表强制上下边缘为数据95%和5%位置
# patch_artist : 上下四分位框内是否填充,True为填充
# meanline、showmeans:是否有均值极其形状
# showbox:是否显示箱线
# showcaps:是否显示边缘线
# showfliers:是否显示异常值
# notch:中间箱体是否缺口
# boxes:所有的箱线
# medians:中位值的横线
# whiskers:从box到error bar之间的竖线
# fliers:异常值
# caps:error bar的横线
# means:均值的横线
也可以这样去表达
data = [np.random.normal(0,std,100) for std in range(1,4)]
fig = plt.figure(figsize = (8,6))
bplot = plt.boxplot(data,notch=False,sym='s',vert=True,patch_artist=True)
#'list' object has no attribute 'boxplot'
plt.xticks([y+1 for y in range(len(data ))],['x1','x2','x3'])
plt.xlabel('x')
plt.title('box plot')
for components in bplot.keys():
for line in bplot[components]:
line.set_color('black')
colors = ['pink','lightblue','lightgreen']
for pathch,color in zip(bplot['boxes'],colors):
pathch.set_facecolor(color)
plt.boxplot()可以进行分组汇总
df = pd.DataFrame(np.random.rand(10,2),columns = ['Col1','Col2'])
df['X']=pd.Series(['A','A','A','A','A','B','B','B','B','B',])
df['Y']=pd.Series(['A','B','A','B','A','B','A','B','A','B',])
print(df)
df.boxplot(by = 'X')
df.boxplot(column = ['Col1','Col2'],by = ['X','Y'])
#columns:按照数据的列分子图
# by:按照列分组做箱型图