ex8:anamaly detection

import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat

def anomalyDetection_example():
    data = loadmat('ex8data1.mat')
    X = data['X']
    Xval = data['Xval']
    yval = data['yval']
    mu, sigma2 = estimateGaussian(X)
    p = multi_gaussian(X, mu, sigma2)

    visualizeFit(X,mu, sigma2)

    _mu, _sigma2 = estimateGaussian(X)
    pval = multi_gaussian(Xval,_mu,_sigma2)
    bestF1, bestepsilon = select_threshold(yval, pval)

    # 将小于临界值的点标记
    outliers = np.where(p < bestepsilon)
    plt.scatter(X[outliers, 0], X[outliers, 1], marker='o', facecolors='none', edgecolors='red')
    plt.show()

def estimateGaussian(X):
    m,n = X.shape
    mu = np.zeros((1, n))
    sigma2 = np.zeros((n, n))
    # 均值
    mu =  np.mean(X,axis=0)
    # 协方差矩阵
    sigma2 = ( ( (X-mu).T).dot(X-mu) ) /m
    return mu,sigma2

def multi_gaussian(X,mu,sigma2):
    m, n = X.shape
    if np.ndim(sigma2) == 1:
        sigma2 = np.diag(sigma2)
    asd = 1 / ( np.power( (2*np.pi) , n/2 ) * np.sqrt(np.linalg.det(sigma2)) )
    p = np.zeros((m,1))
    for i in range(m):
        value = X[i]
        p[i] = np.exp( -1/2 * ( (value-mu).T).dot(np.linalg.inv(sigma2)).dot(value-mu) )
    return asd * p

def select_threshold(yval,pval):
    bestF1 = 0
    bestepsilon = 0
    prediction = np.zeros((len(pval),1))
    step = np.linspace(np.min(pval),np.max(pval),10000)
    for epsilon in step:
        for i in range(len(pval)):
            if pval[i] <= epsilon:
                prediction[i] = 1
            else:
                prediction[i] = 0
        tp = np.sum(yval[np.where(prediction==1)]==1).astype(float)
        fp = np.sum(yval[np.where(prediction==1)]==0).astype(float)
        fn = np.sum(yval[np.where(prediction==0)]==1).astype(float)
        prec = tp/(tp+fp)
        rec = tp/(tp+fn)
        F1 = 2*prec*rec /(prec+rec)
        if F1>bestF1:
            bestF1 = F1
            bestepsilon = epsilon
    return bestF1 , bestepsilon

def plot_data(X):
    plt.figure()
    plt.scatter(X[:,0],X[:,1],c ='red',marker='+')
    plt.xlabel('Latency (ms)')
    plt.ylabel('Throughput (mb/s')
    return plt

# 不会
def visualizeFit(X,mu, sigma2):
    x = np.arange(0, 36, 0.5)
    y = np.arange(0, 36, 0.5)
    X1, X2 = np.meshgrid(x, y)
    Z = multi_gaussian(np.hstack((X1.reshape(-1, 1), X2.reshape(-1, 1))), mu, sigma2)
    Z = Z.reshape(X1.shape)
    plt.plot(X[:, 0], X[:, 1], 'bx')
    if np.sum(np.isinf(Z).astype(float)) == 0:
        lvls = 10 ** np.arange(-20, 0, 3).astype(np.float)
        plt.contour(X1, X2, Z, levels=lvls, colors='black', linewidths=0.7)

anomalyDetection_example()

ex8:anamaly detection_第1张图片

你可能感兴趣的:(吴恩达机器学习)