箱型图判断异常值

import pandas as pd
catering_sale='F:/python/python数据分析与挖掘实战/图书配套数据、代码/chapter3/demo/data/catering_sale.xls'
data=pd.read_excel(catering_sale,index_col=u'日期')
print(data.head())
print(data.tail())
print(len(data))
print(data.describe())
print(data.index)
print(data.columns)
#唯一值
#print(data['销量'].is_unique)
#print(data['销量'].unique())
#print(len(data['销量'].unique()))
#print(data['销量'].value_counts())
#缺失值
#print(data.isnull())
#print(data.dropna())
#print(data.fillna(0))
#重复值
#print(data.duplicated())
#print(data.drop_duplicates())
#排序
#print(data.sort_values(by='销量'))

import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei']#用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False#用来正常显示负号

plt.figure()
p=data.boxplot(return_type='dict')
x=p['fliers'][0].get_xdata()
y=p['fliers'][0].get_ydata()
y.sort()

for i in range(len(x)):
    if i > 0:
        plt.annotate(y[i], xy = (x[i],y[i]), xytext=(x[i]+0.05-0.8/(y[i]-y[i-1]),y[i]))
    else:
        plt.annotate(y[i], xy=(x[i],y[i]), xytext=(x[i]+0.08,y[i]))
plt.show()


'''
for xx,yy in zip(x,y):
    plt.text(xx,yy,str(yy))
 '''   

In[129] data.head(10)
Out[129]: 
                销量
日期                
2015-03-01    51.0
2015-02-28  2618.2
2015-02-27  2608.4
2015-02-26  2651.9
2015-02-25  3442.1
2015-02-24  3393.1
2015-02-23  3136.6
2015-02-22  3744.1
2015-02-21  6607.4
2015-02-20  4060.3

箱型图判断异常值_第1张图片

你可能感兴趣的:(python数据挖掘)