import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
def anomalyDetection_example():
data = loadmat('ex8data1.mat')
X = data['X']
Xval = data['Xval']
yval = data['yval']
mu, sigma2 = estimateGaussian(X)
p = multi_gaussian(X, mu, sigma2)
visualizeFit(X,mu, sigma2)
_mu, _sigma2 = estimateGaussian(X)
pval = multi_gaussian(Xval,_mu,_sigma2)
bestF1, bestepsilon = select_threshold(yval, pval)
# 将小于临界值的点标记
outliers = np.where(p < bestepsilon)
plt.scatter(X[outliers, 0], X[outliers, 1], marker='o', facecolors='none', edgecolors='red')
plt.show()
def estimateGaussian(X):
m,n = X.shape
mu = np.zeros((1, n))
sigma2 = np.zeros((n, n))
# 均值
mu = np.mean(X,axis=0)
# 协方差矩阵
sigma2 = ( ( (X-mu).T).dot(X-mu) ) /m
return mu,sigma2
def multi_gaussian(X,mu,sigma2):
m, n = X.shape
if np.ndim(sigma2) == 1:
sigma2 = np.diag(sigma2)
asd = 1 / ( np.power( (2*np.pi) , n/2 ) * np.sqrt(np.linalg.det(sigma2)) )
p = np.zeros((m,1))
for i in range(m):
value = X[i]
p[i] = np.exp( -1/2 * ( (value-mu).T).dot(np.linalg.inv(sigma2)).dot(value-mu) )
return asd * p
def select_threshold(yval,pval):
bestF1 = 0
bestepsilon = 0
prediction = np.zeros((len(pval),1))
step = np.linspace(np.min(pval),np.max(pval),10000)
for epsilon in step:
for i in range(len(pval)):
if pval[i] <= epsilon:
prediction[i] = 1
else:
prediction[i] = 0
tp = np.sum(yval[np.where(prediction==1)]==1).astype(float)
fp = np.sum(yval[np.where(prediction==1)]==0).astype(float)
fn = np.sum(yval[np.where(prediction==0)]==1).astype(float)
prec = tp/(tp+fp)
rec = tp/(tp+fn)
F1 = 2*prec*rec /(prec+rec)
if F1>bestF1:
bestF1 = F1
bestepsilon = epsilon
return bestF1 , bestepsilon
def plot_data(X):
plt.figure()
plt.scatter(X[:,0],X[:,1],c ='red',marker='+')
plt.xlabel('Latency (ms)')
plt.ylabel('Throughput (mb/s')
return plt
# 不会
def visualizeFit(X,mu, sigma2):
x = np.arange(0, 36, 0.5)
y = np.arange(0, 36, 0.5)
X1, X2 = np.meshgrid(x, y)
Z = multi_gaussian(np.hstack((X1.reshape(-1, 1), X2.reshape(-1, 1))), mu, sigma2)
Z = Z.reshape(X1.shape)
plt.plot(X[:, 0], X[:, 1], 'bx')
if np.sum(np.isinf(Z).astype(float)) == 0:
lvls = 10 ** np.arange(-20, 0, 3).astype(np.float)
plt.contour(X1, X2, Z, levels=lvls, colors='black', linewidths=0.7)
anomalyDetection_example()