import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
np.random.seed(12345)
data = pd.Series(np.random.randn(10000)*100)
########## normal test ###################
u = data.mean()
std = data.std()
print(stats.kstest(data,'norm',(u,std)))
############ kde #######################
fig = plt.figure(figsize = (15,9))
ax1 = fig.add_subplot(2,1,1)
data.plot(kind = 'kde',color = 'k',ylim = [0,0.005],label='kde line')
# add reference line
#添加 3 sigma 线
plt.axvline(u-1*std,color = 'b',linestyle = '--',label='1 sigma')
plt.axvline(u+1*std,color = 'b',linestyle = '--')
plt.text(50,0,"1 sigma",fontdict={'fontsize':15,'color':'b'})
plt.axvline(u-2*std,color = 'g',linestyle = '--',label='2 sigma')
plt.axvline(u+2*std,color = 'g',linestyle = '--')
plt.axvline(u-3*std,color = 'r',linestyle = '--',label='3 sigma')
plt.axvline(u+3*std,color = 'r',linestyle = '--')
plt.axvline(u-6*std,color = 'lime',linestyle = '--',label='6 sigma')
plt.axvline(u+6*std,color = 'lime',linestyle = '--')
ax1.legend()
#### plot and mark error point ##########
#筛选出离群值
left = u-3*std
right = u+3*std
# left = u-2.7*std
# right = u+2.7*std
error = data[(data<left)|(data>right)]
data_c = data[(data>=left)&(data<=right)]
#绘图可视化
print('Error:{}'.format(len(error)))
ax2 = fig.add_subplot(2,1,2)
plt.scatter(data_c.index,data_c.values,color = 'g',alpha = 0.6,label='normal')
plt.scatter(error.index ,error.values ,color = 'r',alpha = 0.8,label='outliers')
ax2.set_xlabel('Index')
ax2.set_ylabel('Value')
ax2.legend()
特别注明:本文属于Python学习笔记,不以盈利为目的,纯手工码字不容易,若整理的笔记中,对您有所助益,麻烦点个赞或者收藏,万分感谢!如有构成侵权的地方,请联系作者删除,谢谢合作!