有这样一道题
这里我们需要对mnist数据集有一定了解,查阅资料后并且根据上节的实验结果我们可以知道mnist数据集进行逻辑回归算法训练结果是个多分类的,
而题目要求是:是否是3,意思是我们要把它做成2分类的,是3的分成一类不是3的分成一类
而关于阈值,Scikit-Learn
默认阈值为0.5所以我们不用管,如果需要修改默认阈值可以参考这篇文章https://blog.csdn.net/weixin_43094965/article/details/121298398?ops_request_misc=&request_id=&biz_id=102&utm_term=mnist%E4%BA%8C%E5%88%86%E7%B1%BB%E8%BF%9B%E8%A1%8C%E6%A0%87%E7%AD%BE%E6%A3%80%E6%B5%8B%E4%BB%A3%E7%A0%81&utm_medium=distribute.pc_search_result.none-task-blog-2~all~sobaiduweb~default-0-121298398.142^v51^control_1,201^v3^control_2&spm=1018.2226.3001.4187
所以我们只需要修改上节中的代码为(在这里我们简化了好多东西)
from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import label_binarize
cancer=fetch_openml("mnist_784")
X_train,X_test,y_train,y_test = train_test_split(cancer.data,cancer.target,test_size=0.5)
y_train_3=(y_train=='3')
y_test_3=(y_test=='3')
model=LogisticRegression()
model.fit(X_train,y_train_3)
y_pred=model.predict(X_test)
accuracy_score_value=accuracy_score(y_test_3,y_pred)
recall_score_value=recall_score(y_test_3,y_pred,average='macro')
precision_score_value=precision_score(y_test_3,y_pred,average='macro')
classification_report_value=classification_report(y_test_3,y_pred)
print("acc:",accuracy_score_value)
print("rec:",recall_score_value)
print("pre:",precision_score_value)
print(classification_report_value)
即可
接下来我们来绘制P-R图
我们需要用到专业的绘图库matplotlib
如果需要学习该绘图库可以参考这个教程
Matplotlib 教程 | 菜鸟教程 (runoob.com)
在这里我们不赘述参考多方资料得出
直接上代码
from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score,f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import label_binarize
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_auc_score,roc_curve
cancer=fetch_openml("mnist_784")
X_train,X_test,y_train,y_test = train_test_split(cancer.data,cancer.target,test_size=0.5)
y_train_3=(y_train=='3')
y_test_3=(y_test=='3')
model=LogisticRegression()
model.fit(X_train,y_train_3)
y_pred=model.predict(X_test)
accuracy_score_value=accuracy_score(y_test_3,y_pred)
recall_score_value=recall_score(y_test_3,y_pred,average='macro')
precision_score_value=precision_score(y_test_3,y_pred,average='macro')
classification_report_value=classification_report(y_test_3,y_pred)
print("acc:",accuracy_score_value)
print("rec:",recall_score_value)
print("pre:",precision_score_value)
print(classification_report_value)
#这里我们假装上一个交叉验证混淆矩阵(参考的资料是这样的)
from sklearn.model_selection import cross_val_predict
y_scores = cross_val_predict(model, X_train, y_train_3, cv=3, method="decision_function")
precisions, recalls, thresholds= precision_recall_curve(y_train_3,y_scores)
def pr(precisions, recalls,thresholds):
plt.plot(recalls, precisions, "b-", linewidth=2)
plt.xlabel("Recall", fontsize=16)
plt.ylabel("Precision", fontsize=16)
plt.axis([0, 1, 0, 1])
plt.grid(True)
plt.figure(figsize=(8, 6))
pr(precisions, recalls,thresholds)
plt.show()
看结果
然后绘制roc
from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score,f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import label_binarize
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_auc_score,roc_curve
cancer=fetch_openml("mnist_784")
X_train,X_test,y_train,y_test = train_test_split(cancer.data,cancer.target,test_size=0.5)
y_train_3=(y_train=='3')
y_test_3=(y_test=='3')
model=LogisticRegression()
model.fit(X_train,y_train_3)
y_pred=model.predict(X_test)
accuracy_score_value=accuracy_score(y_test_3,y_pred)
recall_score_value=recall_score(y_test_3,y_pred,average='macro')
precision_score_value=precision_score(y_test_3,y_pred,average='macro')
classification_report_value=classification_report(y_test_3,y_pred)
print("acc:",accuracy_score_value)
print("rec:",recall_score_value)
print("pre:",precision_score_value)
print(classification_report_value)
from sklearn.model_selection import cross_val_predict
y_scores = cross_val_predict(model, X_train, y_train_3, cv=3, method="decision_function")
precisions, recalls, thresholds= precision_recall_curve(y_train_3,y_scores)
def pr(precisions, recalls,thresholds):
plt.plot(recalls, precisions, "b-", linewidth=2)
plt.xlabel("Recall", fontsize=16)
plt.ylabel("Precision", fontsize=16)
plt.axis([0, 1, 0, 1])
plt.grid(True)
plt.figure(figsize=(8, 6))
pr(precisions, recalls,thresholds)
plt.show()
#这里开始绘制roc曲线
fpr, tpr, thresholds = roc_curve(y_train_3, y_scores)
def roc(fpr, tpr, label=None):
plt.plot(fpr, tpr, linewidth=2, label=label)
plt.plot([0, 1], [0, 1], 'k--')
plt.axis([0, 1, 0, 1])
plt.xlabel('False Positive Rate (Fall-Out)', fontsize=16)
plt.ylabel('True Positive Rate (Recall)', fontsize=16)
plt.grid(True)
plt.figure(figsize=(8, 6))
roc(fpr, tpr)
plt.show()
运行时关掉第一张图会出现第二张
skl提供了计算 ROC AUC 的函数我们直接运用
auc=roc_auc_score(y_train_3, y_scores)
print("auc:",auc)
最后方便大家完整代码是
from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score,f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import label_binarize
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_auc_score,roc_curve
cancer=fetch_openml("mnist_784")
X_train,X_test,y_train,y_test = train_test_split(cancer.data,cancer.target,test_size=0.5)
y_train_3=(y_train=='3')
y_test_3=(y_test=='3')
model=LogisticRegression()
model.fit(X_train,y_train_3)
y_pred=model.predict(X_test)
accuracy_score_value=accuracy_score(y_test_3,y_pred)
recall_score_value=recall_score(y_test_3,y_pred,average='macro')
precision_score_value=precision_score(y_test_3,y_pred,average='macro')
classification_report_value=classification_report(y_test_3,y_pred)
print("acc:",accuracy_score_value)
print("rec:",recall_score_value)
print("pre:",precision_score_value)
print(classification_report_value)
from sklearn.model_selection import cross_val_predict
y_scores = cross_val_predict(model, X_train, y_train_3, cv=3, method="decision_function")
precisions, recalls, thresholds= precision_recall_curve(y_train_3,y_scores)
def pr(precisions, recalls,thresholds):
plt.plot(recalls, precisions, "b-", linewidth=2)
plt.xlabel("Recall", fontsize=16)
plt.ylabel("Precision", fontsize=16)
plt.axis([0, 1, 0, 1])
plt.grid(True)
plt.figure(figsize=(8, 6))
pr(precisions, recalls,thresholds)
plt.show()
fpr, tpr, thresholds = roc_curve(y_train_3, y_scores)
def roc(fpr, tpr, label=None):
plt.plot(fpr, tpr, linewidth=2, label=label)
plt.plot([0, 1], [0, 1], 'k--')
plt.axis([0, 1, 0, 1])
plt.xlabel('False Positive Rate (Fall-Out)', fontsize=16)
plt.ylabel('True Positive Rate (Recall)', fontsize=16)
plt.grid(True)
plt.figure(figsize=(8, 6))
roc(fpr, tpr)
plt.show()
auc=roc_auc_score(y_train_3, y_scores)
print("auc:",auc)
auc结果