箱形图又称为盒须图、盒式图或箱线图,它主要用于反映原始数据分布的特征,还可以进行多组数据分布特征的比较。
箱形图的绘制步骤:
箱形图着色:
df = pd.DataFrame(np.random.rand(10, 5), columns = list("ABCDE"))
color = dict(boxes = 'DarkGreen', whiskers = 'DarkOrange', medians = 'DarkBlue', caps = 'Gray')
df.plot.box(ylim = [0, 1.2],
color = color,
positions = [1, 4, 5, 6, 8])
df.plot.box(vert = False) # 垂直否
plt.boxplot(x, notch=None, sym=None, vert=None, whis=None, positions=None, widths=None, patch_artist=None, bootstrap=None, usermedians=None, conf_intervals=None, meanline=None, showmeans=None, showcaps=None, showbox=None, showfliers=None, boxprops=None, labels=None, flierprops=None, medianprops=None, meanprops=None, capprops=None, whiskerprops=None, manage_xticks=True, autorange=False, zorder=None, hold=None, data=None)
df = pd.DataFrame(np.random.rand(10, 5), columns = list("ABCDE"))
plt.figure(figsize = (10, 4))
f = df.boxplot(notch = False, # 箱体是否带缺口
sym = 'o', # 异常点形状,参考marker
vert = True, # 是否垂直
whis = 1.5,
patch_artist = True, # 箱体是否进行颜色填充
meanline = True, # 如果为True且showmeans为True,则将画出均值线,与中位线平行
showmeans = True, # 显示算数平均值
showcaps = True, # 是否显示上下边缘线
showbox = True, # 是否显示箱体,不显示则填充颜色也不显示
showfliers = True, # 是否显示异常值
return_type ='dict') # 返回类型为字典
boxes:箱体
medians:中位线
means:均值线
whiskers:竖线
caps:边缘线
fliers:异常值
# print(f.keys())
# dict_keys(['boxes', 'fliers', 'medians', 'caps', 'whiskers', 'means'])
for box in f['boxes']:
box.set(edgecolor = 'r', facecolor = 'b', linewidth = 1, alpha = 0.5)
for median in f['medians']:
median.set(color = 'DarkBlue', linewidth = 2)
for mean in f['means']:
mean.set(color = 'y', linewidth = 2)
for whisker in f['whiskers']:
whisker.set(color = 'g', linewidth = 0.5, linestyle = '-')
for cap in f['caps']:
cap.set(color = 'gray', linewidth = 2)
for flier in f['fliers']:
flier.set(marker = 'o', alpha = 0.5) # color为箱子颜色?
df = pd.DataFrame(np.random.rand(6, 2), columns = ['Col1', 'Col2'])
df['X'] = pd.Series(['A', 'A', 'A', 'B', 'B', 'B'])
df['Y'] = pd.Series(['A', 'B', 'B', 'B', 'A', 'A'])
'''
Col1 Col2 X Y
0 0.134612 0.451295 A A
1 0.131603 0.922581 A B
2 0.532679 0.247002 A B
3 0.863306 0.736274 B B
4 0.999598 0.052141 B A
5 0.292755 0.493295 B A
'''
df.boxplot(by = 'X')
df.boxplot(by = ['X', 'Y'], column = ['Col2'])