# coding=utf-8
# 朴素贝叶斯方法
import sklearn
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, precision_score, recall_score, auc,accuracy_score,roc_auc_score
from sklearn.metrics import zero_one_loss,log_loss
from sklearn.naive_bayes import GaussianNB
def plot_roc(name, labels, predictions, **kwargs):
fp, tp, _ = sklearn.metrics.roc_curve(labels, predictions, pos_label=0)
plt.plot(100*fp, 100*tp, label=name, linewidth=2, **kwargs)
plt.xlabel('False positives [%]')
plt.ylabel('True positives [%]')
plt.xlim([-0.5,100.5])
plt.ylim([-0.5,100.5])
plt.grid(True)
ax = plt.gca()
ax.set_aspect('equal')
plt.legend(loc='lower right')
plt.savefig("./img/binary_roc.png")
return auc(fp,tp)
def PrepareData():
#制作数据集
# binary
path = '../dataset/dataset_supplement.npz'
f = np.load(path)
x, y = f['x'][:,:-1], f['x'][:,-1]
f.close()
print(np.array(x).shape)
y = y.astype(np.int16)
# print(np.bincount(y, weights=None, minlength=0))
# y = make_label(y)
# print(np.bincount(y, weights=None, minlength=0))
# np.savez('./dataset/dataset_two_supplement.npz',x=x,y=y)
# multi
# path = '../dataset/dataset_two_supplement.npz'
# f = np.load(path)
# x, y = f['x'], f['y']
# f.close()
# print(np.array(x).shape)
# print(np.bincount(y, weights=None, minlength=0))
#制作训练集、验证集和训练集
#比例:60%:20%:20%
x_train, x_test, y_train, y_test = train_test_split(x, y,
test_size=0.2,
shuffle = True,
stratify = y #根据标签y的比例分割
)
x_train, x_valid, y_train, y_valid = train_test_split(
x_train, y_train,
test_size=0.25,
shuffle = True,
stratify = y_train
)
#y_train = tf.one_hot(y_train, 2)
return x_train,y_train,x_valid,y_valid,x_test,y_test
x_train,y_train,x_valid,y_valid,x_test,y_test=PrepareData()
model = GaussianNB()
model = model.fit(x_train, y_train)
# binary
# test_predictions = model.predict(x_test)
# y_true=y_test.astype('uint8')
# test_scores = 1-(test_predictions - test_predictions.min())/(test_predictions.max() - test_predictions.min())
# mpl.rcParams['figure.figsize'] = (12, 10)
# colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
# auc = plot_roc("My Model", y_true, test_scores, color=colors[0])
# loss = zero_one_loss(y_true,test_predictions.round())
# acc = accuracy_score(y_true,test_predictions.round())
# precision = precision_score(y_true,test_predictions.round())
# print('acc: ',acc)
# print('loss: ',loss)
# print('precision: ',precision)
# print('auc: ',auc)
# multi
y_pred = model.predict_proba(x_test)
test_predictions = np.argmax(y_pred, axis=1)
y_true=y_test.astype('uint8')
test_scores = 1-(test_predictions - test_predictions.min())/(test_predictions.max() - test_predictions.min())
mpl.rcParams['figure.figsize'] = (12, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
auc = plot_roc("My Model", y_true, test_scores, color=colors[0])
loss = zero_one_loss(y_true,test_predictions)
precision = precision_score(y_true,test_predictions,average='weighted')
auc = roc_auc_score(y_true,y_pred,multi_class='ovr')
print('loss: ',loss)
print('precision: ',precision)
print('auc: ',auc)
from sklearn import metrics
print(metrics.classification_report(y_true, test_predictions,digits=4))
使用x_scores的方法进行计算评估指标的方法不准确,只有precision和accuracy是比较准确的,推荐使用classification_report的方法计算,AUC推荐plot_roc中的方法