步骤
代码
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, precision_recall_curve
from sklearn.model_selection import train_test_split
import pdb
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import label_binarize
matplotlib.rc("font", family='kaiti')
def ReadData():
X = load_iris()['data']
Y = load_iris()['target']
random_state = np.random.RandomState(0)
n_samples, n_features = X.shape
X = np.c_[X, random_state.randn(n_samples, 10 * n_features)]
return X, Y
def Train_LogisticRegression_Model(X, Y):
pass
def PLOT_PR(recall, precision):
plt.title('P-R曲线')
plt.ylabel('查准率(R)', fontsize=10)
plt.xlabel('查全率(P)', fontsize=10)
plt.plot(recall, precision)
plt.show()
def PLOT_ROC(FPR, TPR, ROC_AUC):
plt.title('TPR-FPR曲线')
plt.ylabel('正正例率(TPR)', fontsize=10)
plt.xlabel('假正例率(FPR)', fontsize=10)
plt.plot(FPR, TPR)
plt.show()
def ReadData():
X = load_iris()['data']
Y = load_iris()['target']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
return X_train, Y_train, X_test, Y_test
def import_GaussianNB(x_train, y_train, x_test, y_test):
clf = GaussianNB()
clf.fit(x_train, y_train.ravel())
y_hat1 = clf.predict(x_test)
y_score = clf.predict_proba(x_test)
fpr, tpr, roc_auc = dict(), dict(), dict()
precision, recall = dict(), dict()
Y_test = label_binarize(y_test, classes=[0, 1, 2])
K = 1
for i in range(y_score.shape[1]):
precision[i], recall[i], _ = precision_recall_curve(Y_test[:, i], y_score[:, i])
fpr[i], tpr[i], _ = roc_curve(Y_test[:, i], y_score[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
PLOT_ROC(fpr[K], tpr[K], roc_auc[K])
PLOT_PR(recall[K], precision[K])
result = y_hat1 == y_test
print(result)
acc = np.mean(result)
print('准确度:%.2f%%' % (100 * acc))
if __name__ == '__main__':
X_train, Y_train, X_test, Y_test = ReadData()
import_GaussianNB(X_train, Y_train, X_test, Y_test)