画AUC和混淆矩阵
代码如下:
准备数据,构造模型:
from sklearn import datasets
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn import linear_model
from sklearn.metrics import roc_curve,auc
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import itertools
breast_cancer=datasets.load_breast_cancer()
X=breast_cancer.data
y=breast_cancer.target
cv=StratifiedKFold(n_splits=5)
lmr=linear_model.LogisticRegression()
开始画图,AUC和混淆矩阵(数字型,分数型)
def paintRoc(y_true,y_preb):
fpr,tpr,thresholds=roc_curve(y_true,y_preb[:,1])
roc_auc=auc(fpr,tpr)
plt.plot(fpr,tpr,lw=5,alpha=0.8,color='r',label='Roc(AUC=%0.2f)'%(roc_auc))
plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
label='Luck', alpha=.8)
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.title('ROC_auc(AUC=%0.2f)'%(roc_auc))
plt.legend(loc="lower right")
plt.show()
def paintConfusion_digit(lmr_matrix,classes):
plt.imshow(lmr_matrix,interpolation='nearest',cmap=plt.cm.Blues)
plt.title('confusion matrix')
plt.colorbar()
tick_marks=np.arange(len(classes))
plt.xticks(tick_marks,classes,rotation=45)
plt.yticks(tick_marks,classes)
plt.xlabel('Pre label')
plt.ylabel('True label')
fmt='d'
thresh=lmr_matrix.max()/2.
for i,j in itertools.product(range(lmr_matrix.shape[0]),range(lmr_matrix.shape[1])):
plt.text(j, i, format(lmr_matrix[i, j], fmt),
horizontalalignment="center",
color="black" if lmr_matrix[i, j] > thresh else "red")
plt.tight_layout()
plt.show()
def paintConfusion_float(lmr_matrix,classes):
plt.imshow(lmr_matrix,interpolation='nearest',cmap=plt.cm.Blues)
plt.title('confusion matrix')
plt.colorbar()
tick_marks=np.arange(len(classes))
plt.xticks(tick_marks,classes,rotation=45)
plt.yticks(tick_marks,classes)
plt.xlabel('Pre label')
plt.ylabel('True label')
lmr_matrix=lmr_matrix.astype('float')/lmr_matrix.sum(axis=1)[:,np.newaxis]
fmt='.2f'
thresh=lmr_matrix.max()/2.
for i,j in itertools.product(range(lmr_matrix.shape[0]),range(lmr_matrix.shape[1])):
plt.text(j, i, format(lmr_matrix[i, j], fmt),
horizontalalignment="center",
color="black" if lmr_matrix[i, j] > thresh else "red")
plt.tight_layout()
plt.show()
下面是调用上面函数
for train,test in cv.split(X,y):
probas_=lmr.fit(X[train],y[train]).predict_proba(X[test])
y_pre=lmr.fit(X[train],y[train]).predict(X[test])
paintRoc(y[test],probas_)
print('\n')
lmr_matrix=confusion_matrix(y[test],y_pre)
paintConfusion_float(lmr_matrix,set(y_test))
print('\n')
新增:不同阈值下的混淆矩阵绘制:
上代码:
这里阈值,0.1—0.9(我们通常在predict后得到的分数,>0.5认为1,<0.5认为0,即认为阈值为0.5,但是对于不同的模型,不同的数据特征,阈值则需要我们调整)
def DrawConfusion(newtestdata,clf2,y_test):
fig,ax = plt.subplots(nrows=3,ncols=3,figsize=(20,20),facecolor='white')
preb=clf2.predict_proba(newtestdata)
pre=clf2.predict(newtestdata)
for cnt,theld in enumerate([0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]):
index=preb[:,1]>theld
y_pred=pre
y_pred[index]=1
y_pred[np.logical_not(index)]=0
acc=metrics.accuracy_score(y_test,y_pred )*100
lmr_matrix=confusion_matrix(y_test,y_pred)
i=cnt//3
j=cnt%3
fmt='d'
thseld=0.5
ax[i][j].matshow(lmr_matrix,interpolation='hermite',cmap=plt.cm.Blues)
for x in range(lmr_matrix.shape[0]):
for y in range(lmr_matrix.shape[1]):
ax[i][j].text(x,y,format(lmr_matrix[x, y], fmt), horizontalalignment='center',verticalalignment='center',
color='black' if lmr_matrix[x, y]>thseld else 'red',fontsize=20)
ax[i][j].set_xlabel('True label') #坐标轴标签
ax[i][j].set_ylabel('Predicted label') #坐标轴标签
ax[i][j].set_title('theld={},acc={}'.format(theld,acc))
这里 interplolation中可以是[None, ‘none’, ‘nearest’, ‘bilinear’, ‘bicubic’, #’spline16’,
‘spline36’, ‘hanning’, ‘hamming’, ‘hermite’, ‘kaiser’, ‘quadric’,
‘catrom’, ‘gaussian’, ‘bessel’, ‘mitchell’, ‘sinc’, ‘lanczos’]