实验1 贝叶斯分类【机器学习】

步骤

实验1 贝叶斯分类【机器学习】_第1张图片

代码

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, precision_recall_curve
from sklearn.model_selection import train_test_split
import pdb
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import label_binarize

matplotlib.rc("font", family='kaiti')
def ReadData():
    X = load_iris()['data']
    Y = load_iris()['target']
    random_state = np.random.RandomState(0)
    n_samples, n_features = X.shape  # n_samples==150,n_features==4
    X = np.c_[X, random_state.randn(n_samples, 10 * n_features)]

    return X, Y


def Train_LogisticRegression_Model(X, Y):
    pass


def PLOT_PR(recall, precision):

    # 作图
    plt.title('P-R曲线')
    plt.ylabel('查准率(R)', fontsize=10)
    plt.xlabel('查全率(P)', fontsize=10)
    plt.plot(recall, precision)
    plt.show()
def PLOT_ROC(FPR, TPR, ROC_AUC):
    # print("画图")
    # 作图
    plt.title('TPR-FPR曲线')
    plt.ylabel('正正例率(TPR)', fontsize=10)
    plt.xlabel('假正例率(FPR)', fontsize=10)
    plt.plot(FPR, TPR)
    plt.show()



def ReadData():
    # step 1 read dataset
    X = load_iris()['data']
    Y = load_iris()['target']
    # step 2 data division
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
    return X_train, Y_train, X_test, Y_test




def import_GaussianNB(x_train, y_train, x_test, y_test):
    # step 3 build a model
    clf = GaussianNB()
    # step 4 training
    clf.fit(x_train, y_train.ravel())  # 利用训练数据进行训练
    # step 5 testing
    y_hat1 = clf.predict(x_test)
    y_score = clf.predict_proba(x_test)

    fpr, tpr, roc_auc = dict(), dict(), dict()
    precision, recall = dict(), dict()
    Y_test = label_binarize(y_test, classes=[0, 1, 2])  # shape==(150,3)
    K = 1

    # pdb.set_trace()

    for i in range(y_score.shape[1]):
        precision[i], recall[i], _ = precision_recall_curve(Y_test[:, i], y_score[:, i])
        fpr[i], tpr[i], _ = roc_curve(Y_test[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # visualization
    PLOT_ROC(fpr[K], tpr[K], roc_auc[K])
    PLOT_PR(recall[K], precision[K])

    result = y_hat1 == y_test
    print(result)
    acc = np.mean(result)
    print('准确度:%.2f%%' % (100 * acc))

if __name__ == '__main__':
    X_train, Y_train, X_test, Y_test = ReadData()
    import_GaussianNB(X_train, Y_train, X_test, Y_test)

你可能感兴趣的:(机器学习,机器学习,python,sklearn)