from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score,f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import label_binarize
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve,PrecisionRecallDisplay,RocCurveDisplay
from sklearn.metrics import roc_auc_score,roc_curve
import numpy as np
mnist=fetch_openml("mnist_784")
X=np.array(mnist['data'])
y=np.array(mnist['target'])
X_train=X[:60000]
X_test=X[60000:]
y_train=y[:60000]
y_test=y[60000:]
print(X_train.shape,X_test.shape)
print(y[:10])
y_train_3=(y_train=='3')
y_test_3=(y_test=='3')
model=LogisticRegression()
model.fit(X_train,y_train_3)
y_pred=model.predict(X_test)
accuracy_score_value=accuracy_score(y_test_3,y_pred)
recall_score_value=recall_score(y_test_3,y_pred,average='macro')
precision_score_value=precision_score(y_test_3,y_pred,average='macro')
classification_report_value=classification_report(y_test_3,y_pred)
print("acc:",accuracy_score_value)
print("rec:",recall_score_value)
print("pre:",precision_score_value)
print(classification_report_value)
pred_proba=model.predict_proba(X_test)[:,1]
precisions, recalls, thresholds= precision_recall_curve(y_test_3,pred_proba)
disp=PrecisionRecallDisplay(precision=precisions,recall=recalls)
disp.plot()
plt.title('P-R Curve')
plt.show()
auc=roc_auc_score(y_test_3, pred_proba)
fpr, tpr, _= roc_curve(y_test_3, pred_proba)
disp_roc=RocCurveDisplay(fpr=fpr,tpr=tpr)
disp_roc.plot()
plt.title('Roc')
print("auc:",auc)
有一下几点修改:
1.发现了sklearn中有一个直接用于pr图输出的函数PrecisionRecallDisplay(precision=precisions,recall=recalls)
2.然后我们修改了数据集的训练集和预测集的划分
3.去掉了借鉴来的装模做样的交叉矩阵(本来这是用来添加噪声的但是我们不需要噪声所以只是装模作样)
结果就不放了,自己尝试(不会有人不会复制黏贴吧?)
1.我们直接照抄参考代码进行尝试
from sklearn.datasets import make_moons
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score,f1_score,precision_score,accuracy_score,f1_score
from sklearn.metrics import precision_recall_curve,PrecisionRecallDisplay,RocCurveDisplay
from sklearn.metrics import roc_auc_score,roc_curve
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
import os
import pickle
if not os.path.exists('./data.pk|'):
X,y=make_moons(n_samples=1000,noise=0.1)
with open('./data.pk|','wb') as f:
pickle.dump([X,y],f)
else:
with open('./data.pk|','rb') as f:
X,y=pickle.load(f)
plt.scatter(X[:,0],X[:,1],c=y,s=10,cmap='rainbow')
plt.savefig('data.png',dpi=600)
plt.show()
非常顺利,结果:
2.这里完成了按照max——depth参数的输入(a)批量输出模型评估的参数
需要注意的是这里输出图片不能按照最上面数据处理的方式进行
plt.scatter(X[:,0],X[:,1],c=y,s=10,cmap='rainbow')
这里c=y会报错
按照以下方式进行就可以
这边参考了一篇文章的方法进行修改
(1条消息) 月亮数据预测(决策树和随机森林算法)_汪程序猿的博客-CSDN博客_对月亮数据训练并微调一个决策树
from sklearn.datasets import make_moons
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score,f1_score,precision_score,accuracy_score,f1_score
from sklearn.metrics import precision_recall_curve,PrecisionRecallDisplay,RocCurveDisplay
from sklearn.metrics import roc_auc_score,roc_curve
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
import os
import pickle
if not os.path.exists('./data.pk|'):
X,y=make_moons(n_samples=1000,noise=0.1)
with open('./data.pk|','wb') as f:
pickle.dump([X,y],f)
else:
with open('./data.pk|','rb') as f:
X,y=pickle.load(f)
plt.scatter(X[:,0],X[:,1],c=y,s=10,cmap='rainbow')
plt.savefig('data.png',dpi=600)
plt.show()
a=[1,2,3,5,10,None]
for d in a:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
model = DecisionTreeClassifier(criterion="entropy", max_depth=d)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy_score_value = accuracy_score(y_test, y_pred)
recall_score_value = recall_score(y_test, y_pred, average='macro')
precision_score_value = precision_score(y_test, y_pred, average='macro')
f1_score_value = f1_score(y_test, y_pred, average='macro')
pred_proba = model.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, pred_proba)
print("max_depth:", d)
print("acc:", accuracy_score_value)
print("rec:", recall_score_value)
print("pre:", precision_score_value)
print("auc:", auc)
plt.scatter(X_train[:, 0][y_train == 0], X_train[:, 1][y_train == 0], c='b', marker='o', s=10)
plt.scatter(X_train[:, 0][y_train == 1], X_train[:, 1][y_train == 1], c='r', marker='o', s=10)
plt.show()
3.直接上代码吧,花了几个小时
这里还要下载点东西
conda install python-graphviz
apt-get install graphviz
apt-get install graphviz graphviz-doc
最后代码
from sklearn.datasets import make_moons
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score,f1_score,precision_score,accuracy_score,f1_score
from sklearn.metrics import precision_recall_curve,PrecisionRecallDisplay,RocCurveDisplay
from sklearn.metrics import roc_auc_score,roc_curve
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import export_graphviz
import os
import pickle
import graphviz
if not os.path.exists('./data.pk|'):
X,y=make_moons(n_samples=1000,noise=0.1)
with open('./data.pk|','wb') as f:
pickle.dump([X,y],f)
else:
with open('./data.pk|','rb') as f:
X,y=pickle.load(f)
plt.scatter(X[:,0],X[:,1],c=y,s=10,cmap='rainbow')
plt.savefig('data.png',dpi=600)
plt.show()
a=[1,2,3,5,10,None]
for d in a:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
model = DecisionTreeClassifier(criterion="entropy", max_depth=d)
zz=model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy_score_value = accuracy_score(y_test, y_pred)
recall_score_value = recall_score(y_test, y_pred, average='macro')
precision_score_value = precision_score(y_test, y_pred, average='macro')
f1_score_value = f1_score(y_test, y_pred, average='macro')
pred_proba = model.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, pred_proba)
print("max_depth:", d)
print("acc:", accuracy_score_value)
print("rec:", recall_score_value)
print("pre:", precision_score_value)
print("auc:", auc)
plt.scatter(X_train[:, 0][y_train == 0], X_train[:, 1][y_train == 0], c='b', marker='o', s=10)
plt.scatter(X_train[:, 0][y_train == 1], X_train[:, 1][y_train == 1], c='r', marker='o', s=10)
plt.show()
dot_data = export_graphviz(model, max_depth=d,out_file=None, class_names=['0', '1'])
graph = graphviz.Source(dot_data)
graph.render('决策树可视化'+str(d))
运行结果会有7张图分别对应初始数据处理,max_depth=1,2,3,5,10,None时的散点图
还有决策树可视化的图会自动输出到你的代码项目所在的文件夹,共6张,自动转换为pdf文件
里面具体的我就不放了自己看吧