sklearn 混淆矩阵分析pima 印第安人糖尿病数据

数据链接:FE_pima-indians-diabetes.csv · biabianm/pima-indians-diabetes - Gitee.com

import pandas as pd
from sklearn.model_selection import train_test_split  # 数据分离
from sklearn.linear_model import LogisticRegression  # 逻辑回归模型
from sklearn import metrics  # 准确率评估

path = f"../FE_pima-indians-diabetes.csv"


def confusion_matrix_test():
    """
    pima 印第安人糖尿病
    混淆矩阵分析
    :return:
    """
    pima = pd.read_csv(path)
    print(pima.columns.values)
    # x,y赋值
    feature_names = ['pregnants', 'serum_insulin', 'BMI', 'Age']
    x = pima[feature_names]
    y = pima['Target']
    # 维度确认
    print(x.shape)
    print(y.shape)

    # 数据分离
    x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0)

    # 模型训练
    logreg = LogisticRegression()
    logreg.fit(x_train, y_train)

    # 基于测试数据集结果预测
    y_pred = logreg.predict(x_test)

    # 使用准确率进行评估
    print(metrics.accuracy_score(y_test, y_pred))

    # 确认自负样本数据量
    print(y_test.value_counts())
    # 1的比例
    print(y_test.mean())
    # 0的比例
    print(1 - y_test.mean())
    # 空准确率
    print(max(y_test.mean(), 1 - y_test.mean()))

    # 单独准确率评估局限性大

    # 混淆矩阵实现
    # 计算展示混淆矩阵
    print(metrics.confusion_matrix(y_test, y_pred))

    # 展示部分实际结果与预测结果(25组)
    print("true", y_test.values[0:25])
    print("pred", y_pred[0:25])

    # 四个因子赋值
    confusion = metrics.confusion_matrix(y_test, y_pred)
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    TP = confusion[1, 1]
    print(TN, FP, FN, TP)

    # 混淆矩阵指标
    # 准确率
    accuracy = (TP + TN) / (TP + TN + FP + FN)
    print(accuracy)
    print(metrics.accuracy_score(y_test, y_pred))
    # 错误率
    mis_rate = (FP + FN) / (TP + TN + FP + FN)
    print(mis_rate)
    print(1 - accuracy)
    # 灵敏度(召回率:正样本中,预测正确的比例)
    recall = TP / (TP + FN)
    print(recall)
    # 特异度:负样本中,预测正确的比例
    specificity = TN / (TN + FP)
    print(specificity)
    # 精确率:预测结果为正的样本中,预测正确的比例
    precision = TP / (TP + FP)
    print(precision)
    # F1分数:综合Precision和Recall的一个判断指标
    f1_recall = 2 * precision * recall / (precision * recall)
    print(f1_recall)


if __name__ == '__main__':
    confusion_matrix_test()

你可能感兴趣的:(机器学习,python,数据挖掘,sklearn,机器学习,python,开发语言)