两个实例:1.来自于 sklearn官网 2.自己构造数据,在第二个例子中发现,dbscan认为的的异常包含在itree认为异常中,
也就是说itree,会给出一个异常的排序,需要看到这个顺序(后面再做)
例子1:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
rng = np.random.RandomState(42)
# Generate train data
X = 0.3 * rng.randn(100, 2)
X_train = np.r_[X + 2, X - 2]
print("len X_train",len(X_train))
# Generate some regular novel observations
X = 0.3 * rng.randn(20, 2)
X_test = np.r_[X + 2, X - 2]
print("len X_test",len(X_test))
# Generate some abnormal novel observations
X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
#print("X_outliers:",X_outliers)
# fit the model
clf = IsolationForest(behaviour='new', max_samples=100,
random_state=rng, contamination='auto')
clf.fit(X_train)
y_pred_train = clf.predict(X_train)
print("y_pred_train:",y_pred_train)
print("1",X_train[y_pred_train==1])
print("-1",X_train[y_pred_train==-1])
print("###########################")
y_pred_test = clf.predict(X_test)
print("y_pred_test:",y_pred_test)
print("1",X_test[y_pred_test==1])
print("-1",X_test[y_pred_test==-1])
print("###########################")
y_pred_outliers = clf.predict(X_outliers)
print("y_pred_outliers:",y_pred_outliers)
print("1",X_outliers[y_pred_outliers==1])
print("-1",X_outliers[y_pred_outliers==-1])
"""
# plot the line, the samples, and the nearest vectors to the plane
xx, yy = np.meshgrid(np.linspace(-5, 5, 50), np.linspace(-5, 5, 50))
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.title("IsolationForest")
plt.contourf(xx, yy, Z, cmap=plt.cm.Blues_r)
b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white',
s=20, edgecolor='k')
b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green',
s=20, edgecolor='k')
c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red',
s=20, edgecolor='k')
plt.axis('tight')
plt.xlim((-5, 5))
plt.ylim((-5, 5))
plt.legend([b1, b2, c],
["training observations",
"new regular observations", "new abnormal observations"],
loc="upper left")
plt.show()
"""
例子2:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
data=[
[-2.68420713,1.469732895,2],[-2.71539062,-0.763005825,2],[-2.88981954,-0.618055245,2],[-2.7464372,-1.40005944,2],[-2.72859298,1.50266052,2],
[-2.27989736,3.365022195,2],[-2.82089068,-0.369470295,2],[-2.62648199,0.766824075,2],[-2.88795857,-2.568591135,2],[-2.67384469,-0.48011265,2],
[-2.50652679,2.933707545,2],[-2.61314272,0.096842835,2],[-2.78743398,-1.024830855,2],[-3.22520045,-2.264759595,2],[-2.64354322,5.33787705,2],
[-2.38386932,6.05139453,2],[-2.6225262,3.681403515,2],[-2.64832273,1.436115015,2],[-2.19907796,3.956598405,2],[-2.58734619,2.34213138,2],
[1.28479459,3.084476355,2],[0.93241075,1.436391405,2],[1.46406132,2.268854235,2],[0.18096721,-3.71521773,2],[1.08713449,0.339256755,2],
[0.64043675,-1.87795566,2],[1.09522371,1.277510445,2],[-0.75146714,-4.504983795,2],[1.04329778,1.030306095,2],[-0.01019007,-3.242586915,2],
[-0.5110862,-5.681213775,2],[0.51109806,-0.460278495,2],[0.26233576,-2.46551985,2],[0.98404455,-0.55962189,2],[-0.174864,-1.133170065,2],
[0.92757294,2.107062945,2],[0.65959279,-1.583893305,2],[0.23454059,-1.493648235,2],[0.94236171,-2.43820017,2],[0.0432464,-2.616702525,2],
[4.53172698,-0.05329008,2],[30.41407223,-2.58716277,2],[4.61648461,1.538708805,2],[3.97081495,-0.815065605,2],[4.34975798,-0.188471475,2],
[5.39687992,2.462256225,2],[2.51938325,-5.361082605,2],[4.9320051,1.585696545,2],[4.31967279,-1.104966765,2],[4.91813423,3.511712835,2],
[3.66193495,1.0891728,2],[111,-0.972695745,2],[4.16537886,0.96876126,2],[3.34459422,-3.493869435,2],[3.5852673,-2.426881725,2],
[3.90474358,0.534685455,2],[3.94924878,0.18328617,2],[59.48876538,5.27195043,13],[5.79468686,1.139695065,2],[3.29832982,-3.42456273,2],
[-2.68420713,1.469732895,2],[-2.71539062,-0.763005825,2],[-2.88981954,-0.618055245,2],[-2.7464372,-1.40005944,2],[-2.72859298,1.50266052,2]]
X_train = np.array(data)
print("X_train:",X_train)
# fit the model
clf = IsolationForest(behaviour='new', max_samples=100,
contamination='auto')
clf.fit(X_train)
y_pred_train = clf.predict(X_train)
print("y_pred_train:",y_pred_train)
print("1",X_train[y_pred_train==1])
print("-1",X_train[y_pred_train==-1])
print("###########################")