吴恩达机器学习-检测异常服务器

 代码:
 

import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
mat=sio.loadmat('ex8data1.mat')
print(mat.keys())# X Xval yval
X=mat['X']#(307,2)
Xval,yval=mat['Xval'],mat['yval']#(307,2)(307,1)

# 绘制初始图像
plt.plot(X[:,0],X[:,1],'bx')
plt.show()

# 1.获取训练集中样本特征的均值和方差
def estimateGaussianl(X,isCovariance):
    means=np.mean(X,axis=0)
    if isCovariance:
        sigma2=(X-means).T@(X-means)/len(X)
    else:
        sigma2=np.var(X,axis=0)
    return means,sigma2

# 2.多元正态分布密度函数
def gaussian(X,means,sigma2):
    if np.ndim(sigma2)==1:
        # 转为二维矩阵
        sigma2=np.diag(sigma2)
    X=X-means
    n=X.shape[1]
    first=np.power(2*np.pi,-n/2)*(np.linalg.det(sigma2)**(-0.5))#是一个数
    second=np.diag([email protected](sigma2)@X.T)#(307,)
    p=first*np.exp(-0.5*second)#(307,)
    p=p.reshape(-1,1)#转化成一列
    return p

# 3.绘图
def plotGaussian(X,means,sigma2):
    x=np.arange(0,30,0.5)
    y=np.arange(0,30,0.5)
    xx,yy=np.meshgrid(x,y)
    # 计算对应的高斯分布函数
    z=gaussian(np.c_[xx.ravel(),yy.ravel()],means,sigma2)
    zz=z.reshape(xx.shape)
    plt.plot(X[:,0],X[:,1],'bx')
    contour_levels=[10**h for h in range(-20,0,3)]
    plt.contour(xx,yy,zz,contour_levels)

means,sigma2=estimateGaussianl(X,isCovariance=False)
plotGaussian(X,means,sigma2)

# 4.选取阈值
def selectThreshold(yval,p):
    bestEpsilon=0
    bestF1=0
    # 候选值
    epsilons=np.linspace(min(p),max(p),1000)
    for e in epsilons:
        p_=pbestF1:
            bestF1=F1_e
            bestEpsilon=e
    return bestEpsilon,bestF1

means,sigma2=estimateGaussianl(X,isCovariance=False)
print(means,sigma2)
pval=gaussian(Xval,means,sigma2)
bestEpsilon,bestF1=selectThreshold(yval,pval)

# 找出异常点
p=gaussian(X,means,sigma2)
anoms=np.array([X[i] for i in range(X.shape[0]) if p[i]

 结果展示:

吴恩达机器学习-检测异常服务器_第1张图片

 

你可能感兴趣的:(python,机器学习)