机器学习笔记2(使用mnist数据集是否是3,评估逻辑回归优化算法的度量,计算acc,p-r曲线图,roc,auc)

有这样一道题

 机器学习笔记2(使用mnist数据集是否是3,评估逻辑回归优化算法的度量,计算acc,p-r曲线图,roc,auc)_第1张图片这里我们需要对mnist数据集有一定了解,查阅资料后并且根据上节的实验结果我们可以知道mnist数据集进行逻辑回归算法训练结果是个多分类的,

机器学习笔记2(使用mnist数据集是否是3,评估逻辑回归优化算法的度量,计算acc,p-r曲线图,roc,auc)_第2张图片

题目要求是:是否是3,意思是我们要把它做成2分类的,是3的分成一类不是3的分成一类

而关于阈值,Scikit-Learn默认阈值为0.5所以我们不用管,如果需要修改默认阈值可以参考这篇文章https://blog.csdn.net/weixin_43094965/article/details/121298398?ops_request_misc=&request_id=&biz_id=102&utm_term=mnist%E4%BA%8C%E5%88%86%E7%B1%BB%E8%BF%9B%E8%A1%8C%E6%A0%87%E7%AD%BE%E6%A3%80%E6%B5%8B%E4%BB%A3%E7%A0%81&utm_medium=distribute.pc_search_result.none-task-blog-2~all~sobaiduweb~default-0-121298398.142^v51^control_1,201^v3^control_2&spm=1018.2226.3001.4187

所以我们只需要修改上节中的代码为(在这里我们简化了好多东西)

from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import label_binarize
cancer=fetch_openml("mnist_784")
X_train,X_test,y_train,y_test = train_test_split(cancer.data,cancer.target,test_size=0.5)
y_train_3=(y_train=='3')
y_test_3=(y_test=='3')
model=LogisticRegression()
model.fit(X_train,y_train_3)
y_pred=model.predict(X_test)
accuracy_score_value=accuracy_score(y_test_3,y_pred)
recall_score_value=recall_score(y_test_3,y_pred,average='macro')
precision_score_value=precision_score(y_test_3,y_pred,average='macro')
classification_report_value=classification_report(y_test_3,y_pred)
print("acc:",accuracy_score_value)
print("rec:",recall_score_value)
print("pre:",precision_score_value)
print(classification_report_value)

即可

输出结果为机器学习笔记2(使用mnist数据集是否是3,评估逻辑回归优化算法的度量,计算acc,p-r曲线图,roc,auc)_第3张图片

 接下来我们来绘制P-R图

我们需要用到专业的绘图库matplotlib

如果需要学习该绘图库可以参考这个教程

Matplotlib 教程 | 菜鸟教程 (runoob.com)

在这里我们不赘述参考多方资料得出

直接上代码

from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score,f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import label_binarize
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_auc_score,roc_curve
cancer=fetch_openml("mnist_784")
X_train,X_test,y_train,y_test = train_test_split(cancer.data,cancer.target,test_size=0.5)
y_train_3=(y_train=='3')
y_test_3=(y_test=='3')
model=LogisticRegression()
model.fit(X_train,y_train_3)
y_pred=model.predict(X_test)
accuracy_score_value=accuracy_score(y_test_3,y_pred)
recall_score_value=recall_score(y_test_3,y_pred,average='macro')
precision_score_value=precision_score(y_test_3,y_pred,average='macro')
classification_report_value=classification_report(y_test_3,y_pred)
print("acc:",accuracy_score_value)
print("rec:",recall_score_value)
print("pre:",precision_score_value)
print(classification_report_value)
#这里我们假装上一个交叉验证混淆矩阵(参考的资料是这样的)
from sklearn.model_selection import cross_val_predict
y_scores = cross_val_predict(model, X_train, y_train_3, cv=3, method="decision_function")
precisions, recalls, thresholds= precision_recall_curve(y_train_3,y_scores)
def pr(precisions, recalls,thresholds):
    plt.plot(recalls, precisions, "b-", linewidth=2)
    plt.xlabel("Recall", fontsize=16)
    plt.ylabel("Precision", fontsize=16)
    plt.axis([0, 1, 0, 1])
    plt.grid(True)
plt.figure(figsize=(8, 6))
pr(precisions, recalls,thresholds)
plt.show()

看结果

机器学习笔记2(使用mnist数据集是否是3,评估逻辑回归优化算法的度量,计算acc,p-r曲线图,roc,auc)_第4张图片

 然后绘制roc

from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score,f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import label_binarize
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_auc_score,roc_curve
cancer=fetch_openml("mnist_784")
X_train,X_test,y_train,y_test = train_test_split(cancer.data,cancer.target,test_size=0.5)
y_train_3=(y_train=='3')
y_test_3=(y_test=='3')
model=LogisticRegression()
model.fit(X_train,y_train_3)
y_pred=model.predict(X_test)
accuracy_score_value=accuracy_score(y_test_3,y_pred)
recall_score_value=recall_score(y_test_3,y_pred,average='macro')
precision_score_value=precision_score(y_test_3,y_pred,average='macro')
classification_report_value=classification_report(y_test_3,y_pred)
print("acc:",accuracy_score_value)
print("rec:",recall_score_value)
print("pre:",precision_score_value)
print(classification_report_value)
from sklearn.model_selection import cross_val_predict
y_scores = cross_val_predict(model, X_train, y_train_3, cv=3, method="decision_function")
precisions, recalls, thresholds= precision_recall_curve(y_train_3,y_scores)
def pr(precisions, recalls,thresholds):
    plt.plot(recalls, precisions, "b-", linewidth=2)
    plt.xlabel("Recall", fontsize=16)
    plt.ylabel("Precision", fontsize=16)
    plt.axis([0, 1, 0, 1])
    plt.grid(True)
plt.figure(figsize=(8, 6))
pr(precisions, recalls,thresholds)
plt.show()
#这里开始绘制roc曲线
fpr, tpr, thresholds = roc_curve(y_train_3, y_scores)
def roc(fpr, tpr, label=None):
    plt.plot(fpr, tpr, linewidth=2, label=label)
    plt.plot([0, 1], [0, 1], 'k--') 
    plt.axis([0, 1, 0, 1])  
    plt.xlabel('False Positive Rate (Fall-Out)', fontsize=16) 
    plt.ylabel('True Positive Rate (Recall)', fontsize=16) 
    plt.grid(True)

plt.figure(figsize=(8, 6))
roc(fpr, tpr)
plt.show()

运行时关掉第一张图会出现第二张

机器学习笔记2(使用mnist数据集是否是3,评估逻辑回归优化算法的度量,计算acc,p-r曲线图,roc,auc)_第5张图片

skl提供了计算 ROC AUC 的函数我们直接运用

auc=roc_auc_score(y_train_3, y_scores)
print("auc:",auc)

最后方便大家完整代码是

from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score,f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import label_binarize
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_auc_score,roc_curve
cancer=fetch_openml("mnist_784")
X_train,X_test,y_train,y_test = train_test_split(cancer.data,cancer.target,test_size=0.5)
y_train_3=(y_train=='3')
y_test_3=(y_test=='3')
model=LogisticRegression()
model.fit(X_train,y_train_3)
y_pred=model.predict(X_test)
accuracy_score_value=accuracy_score(y_test_3,y_pred)
recall_score_value=recall_score(y_test_3,y_pred,average='macro')
precision_score_value=precision_score(y_test_3,y_pred,average='macro')
classification_report_value=classification_report(y_test_3,y_pred)
print("acc:",accuracy_score_value)
print("rec:",recall_score_value)
print("pre:",precision_score_value)
print(classification_report_value)
from sklearn.model_selection import cross_val_predict
y_scores = cross_val_predict(model, X_train, y_train_3, cv=3, method="decision_function")
precisions, recalls, thresholds= precision_recall_curve(y_train_3,y_scores)
def pr(precisions, recalls,thresholds):
    plt.plot(recalls, precisions, "b-", linewidth=2)
    plt.xlabel("Recall", fontsize=16)
    plt.ylabel("Precision", fontsize=16)
    plt.axis([0, 1, 0, 1])
    plt.grid(True)
plt.figure(figsize=(8, 6))
pr(precisions, recalls,thresholds)
plt.show()
fpr, tpr, thresholds = roc_curve(y_train_3, y_scores)

def roc(fpr, tpr, label=None):
    plt.plot(fpr, tpr, linewidth=2, label=label)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.axis([0, 1, 0, 1])
    plt.xlabel('False Positive Rate (Fall-Out)', fontsize=16)
    plt.ylabel('True Positive Rate (Recall)', fontsize=16)
    plt.grid(True)

plt.figure(figsize=(8, 6))
roc(fpr, tpr)
plt.show()
auc=roc_auc_score(y_train_3, y_scores)
print("auc:",auc)

auc结果

你可能感兴趣的:(机器学习,算法,逻辑回归)