pyod 模型组合实例

pyod —— 模型组合实例

代码参考地址:https://github.com/yzhao062/pyod/blob/master/examples/comb_example.py

离群值检测由于其不受监督的性质而经常遭受模型不稳定性的困扰。因此,建议例如通过求平均来组合各种检测器输出以提高其鲁棒性。

此演示中显示了四种分数组合机制:

1.平均值:所有检测器的平均分数。

2.最大化:所有探测器的最高分。

3.平均值的最大值(AOM):将基本检测器划分为子组,并为每个子组获取最大分数。最终分数是所有子组分数的平均值。

4.最大平均值(MOA):将基本检测器划分为子组,并获取每个子组的平均分数。最终分数是所有子组分数中的最高分数。

import numpy as np
from sklearn.model_selection import train_test_split
from pyod.models.knn import KNN  # kNN detector
from pyod.models.combination import aom, moa, average, maximization, median
from pyod.utils.data import generate_data
from pyod.utils.data import evaluate_print
from pyod.utils.utility import standardizer

X, y= generate_data(train_only=True)  # load data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

# standardizing data for processing
X_train_norm, X_test_norm = standardizer(X_train, X_test)

n_clf = 20  # number of base detectors
k_list = list(range(10,201,10))

train_scores = np.zeros([X_train.shape[0], n_clf])
test_scores = np.zeros([X_test.shape[0], n_clf])

for i in range(n_clf):
    k=k_list[i]
    clf = KNN(n_neighbors=k,method='largest')
    clf.fit(X_train_norm)
    
    train_scores[:,i] = clf.decision_scores_
    test_scores[:,i] = clf.decision_function(X_test_norm)

# Decision scores have to be normalized before combination
train_scores_norm,test_scores_norm = standardizer(train_scores,test_scores)

# conbine by average
y_by_average = average(test_scores_norm)
evaluate_print('Combination by Average', y_test, y_by_average)

# Combination by max
y_by_maximization = maximization(test_scores_norm)
evaluate_print('Combination by Maximization', y_test, y_by_maximization)

  # Combination by max
y_by_maximization = median(test_scores_norm)
evaluate_print('Combination by Median', y_test, y_by_maximization)

# Combination by aom
y_by_aom = aom(test_scores_norm, n_buckets=5)
evaluate_print('Combination by AOM', y_test, y_by_aom)

# Combination by moa
y_by_moa = moa(test_scores_norm, n_buckets=5)
evaluate_print('Combination by MOA', y_test, y_by_moa)

你可能感兴趣的:(python)