import numpy as np
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB, ComplementNB, BernoulliNB, CategoricalNB
from sklearn.metrics import precision_score # tp / (tp + fp)
from sklearn.metrics import recall_score # tp / (tp + fn)
from sklearn.metrics import accuracy_score # acc
from sklearn.metrics import auc, roc_curve, roc_auc_score
from sklearn.datasets import load_iris, load_breast_cancer
from sklearn.utils import shuffle
import warnings
warnings.filterwarnings("ignore")
def lr_cls(data, label):
n_class = len(set(label))
if n_class == 2:
print("binary-classification!")
cls = LogisticRegression().fit(data, label)
score = cls.score(data, label) # acc
predict = cls.predict(data)
acc = accuracy_score(label, predict) # acc
precision = precision_score(label, predict, pos_label=1)
recall = recall_score(label, predict, pos_label=1)
return score, acc, precision, recall
elif n_class > 2:
print("multi-classification!")
cls = LogisticRegression().fit(data, label)
score = cls.score(data, label) # acc
predict = cls.predict(data)
acc = accuracy_score(label, predict) # acc
precision = precision_score(label, predict, average=None) # None=[1,0.979,0.942] 'micro'=0.973 'macro'=0.974
recall = recall_score(label, predict, average=None)
precision = np.round(np.array(precision*10000)) / 10000
recall = np.round(np.array(recall * 10000)) / 10000
score = np.round(np.array(score * 10000)) / 10000
acc = np.round(np.array(acc * 10000)) / 10000
return score, acc, precision, recall
return
def svm_cls(data, label):
cls = SVC()
cls.fit(data, label)
predict = cls.predict(data)
acc = accuracy_score(label, predict)
precision = precision_score(label, predict)
recall = recall_score(label, predict)
return acc, precision, recall
def bys_cls(data, label):
cls = GaussianNB()
cls.fit(data, label)
predict = cls.predict(data)
acc = accuracy_score(label, predict)
precision = precision_score(label, predict)
recall = recall_score(label, predict)
return acc, precision, recall
def dt_cls(data, label):
cls = DecisionTreeClassifier().fit(data, label)
predict = cls.predict(data)
acc = accuracy_score(label, predict)
precision = precision_score(label, predict)
recall = recall_score(label, predict)
return acc, precision, recall
def gb_cls(data, label):
cls = GradientBoostingClassifier()
cls.fit(data, label)
predict = cls.predict(data)
acc = accuracy_score(label, predict)
precision = precision_score(label, predict)
recall = recall_score(label, predict)
return acc, precision, recall
def ab_cls(data, label):
cls = AdaBoostClassifier()
cls.fit(data, label)
predict = cls.predict(data)
acc = accuracy_score(label, predict)
precision = precision_score(label, predict)
recall = recall_score(label, predict)
return acc, precision, recall
def rf_cls(data, label):
cls = RandomForestClassifier()
cls.fit(data, label)
predict = cls.predict(data)
acc = accuracy_score(label, predict)
precision = precision_score(label, predict)
recall = recall_score(label, predict)
return acc, precision, recall
def auc_cal():
# 2-class
x1, y1 = load_breast_cancer(return_X_y=True) # (569,30) (569,) two categories
clf1 = LogisticRegression(solver="liblinear", random_state=0).fit(x1, y1)
y1_prob = clf1.predict_proba(x1)[:, 1]
k1 = roc_auc_score(y1, y1_prob) # equal roc_curve + auc (569,) (569,) 0.9946
fpr1, tpr1, thread1 = roc_curve(y1, y1_prob, pos_label=1) # 0.9946
area1 = auc(fpr1, tpr1)
# 3-class multi-class
x2, y2 = load_iris(return_X_y=True) # (150,4) (150,) three categories
clf2 = LogisticRegression(solver="liblinear").fit(x2, y2)
y2_prob = clf2.predict_proba(x2) # (150,3)
k2 = roc_auc_score(y2, y2_prob, multi_class='ovr') # (150,) (150,3) equal (roc_curve + auc) * 3 0.991333
fpr2, tpr2, thread2 = roc_curve(y, y2_prob[:, 2], pos_label=2) # (1+0.9862+0.9878)/3=0.991333 i=0,1,2
area2 = auc(fpr2, tpr2)
return k1, area1, k2, area2
if __name__ == "__main__":
print("Hello")
x_data = np.array([[1.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 0.0]])
y_data = np.array([0, 1, 1, 0])
x_shuffle, y_shuffle = shuffle(x_data, y_data, random_state=0) # shuffle
x, y = load_iris(return_X_y=True) # (150,4) (150,) three categories
X = np.concatenate((x[:, :2], x[:, 2:]), axis=0) # (300, 2)
Y = np.concatenate((np.array([0 for i in range(150)]), np.array([1 for i in range(150)])), axis=0) # (300,)
res = lr_cls(x, y)
print("LR score:", res[0], "acc:", res[1], " precision:", res[2], " recall:", res[3])
res = lr_cls(X, Y)
print('LR score: %.4f, acc: %.4f, precision: %.4f, recall: %.4f' % (res[0], res[1], res[2], res[3]))
res = svm_cls(X, Y)
print('SVM acc: %.4f, precision: %.4f, recall: %.4f' % (res[0], res[1], res[2]))
res = bys_cls(X, Y)
print('BYS acc: %.4f, precision: %.4f, recall: %.4f' % (res[0], res[1], res[2]))
res = dt_cls(X, Y)
print('DT acc: %.4f, precision: %.4f, recall: %.4f' % (res[0], res[1], res[2]))
res = ab_cls(X, Y)
print('AB acc: %.4f, precision: %.4f, recall: %.4f' % (res[0], res[1], res[2]))
res = gb_cls(X, Y)
print('GB acc: %.4f, precision: %.4f, recall: %.4f' % (res[0], res[1], res[2]))
res = rf_cls(X, Y)
print('RF acc: %.4f, precision: %.4f, recall: %.4f' % (res[0], res[1], res[2]))
运行结果: