数据分析师养成之路之python偏:(画AUC和混淆矩阵)

画AUC和混淆矩阵
代码如下:

准备数据,构造模型:

from sklearn import datasets

import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn import linear_model
from sklearn.metrics import roc_curve,auc
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import itertools

breast_cancer=datasets.load_breast_cancer()
X=breast_cancer.data
y=breast_cancer.target

cv=StratifiedKFold(n_splits=5)
lmr=linear_model.LogisticRegression()

开始画图,AUC和混淆矩阵(数字型,分数型)

def paintRoc(y_true,y_preb):
    fpr,tpr,thresholds=roc_curve(y_true,y_preb[:,1])
    roc_auc=auc(fpr,tpr)
    plt.plot(fpr,tpr,lw=5,alpha=0.8,color='r',label='Roc(AUC=%0.2f)'%(roc_auc))
    plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
         label='Luck', alpha=.8)
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    plt.title('ROC_auc(AUC=%0.2f)'%(roc_auc))
    plt.legend(loc="lower right")
    plt.show()

def paintConfusion_digit(lmr_matrix,classes):
    plt.imshow(lmr_matrix,interpolation='nearest',cmap=plt.cm.Blues)
    plt.title('confusion matrix')
    plt.colorbar()
    tick_marks=np.arange(len(classes))
    plt.xticks(tick_marks,classes,rotation=45)
    plt.yticks(tick_marks,classes)
    plt.xlabel('Pre label')
    plt.ylabel('True label')

    fmt='d'
    thresh=lmr_matrix.max()/2.
    for i,j in itertools.product(range(lmr_matrix.shape[0]),range(lmr_matrix.shape[1])):
        plt.text(j, i, format(lmr_matrix[i, j], fmt),
                     horizontalalignment="center",
                     color="black" if lmr_matrix[i, j] > thresh else "red")
    plt.tight_layout()
    plt.show()

def paintConfusion_float(lmr_matrix,classes):
    plt.imshow(lmr_matrix,interpolation='nearest',cmap=plt.cm.Blues)
    plt.title('confusion matrix')
    plt.colorbar()
    tick_marks=np.arange(len(classes))
    plt.xticks(tick_marks,classes,rotation=45)
    plt.yticks(tick_marks,classes)
    plt.xlabel('Pre label')
    plt.ylabel('True label')

    lmr_matrix=lmr_matrix.astype('float')/lmr_matrix.sum(axis=1)[:,np.newaxis]


    fmt='.2f'
    thresh=lmr_matrix.max()/2.
    for i,j in itertools.product(range(lmr_matrix.shape[0]),range(lmr_matrix.shape[1])):
        plt.text(j, i, format(lmr_matrix[i, j], fmt),
                     horizontalalignment="center",
                     color="black" if lmr_matrix[i, j] > thresh else "red")
    plt.tight_layout()
    plt.show()

下面是调用上面函数

for train,test in cv.split(X,y):
    probas_=lmr.fit(X[train],y[train]).predict_proba(X[test])
    y_pre=lmr.fit(X[train],y[train]).predict(X[test])
    paintRoc(y[test],probas_)
    print('\n')
    lmr_matrix=confusion_matrix(y[test],y_pre)
    paintConfusion_float(lmr_matrix,set(y_test))
    print('\n')

新增:不同阈值下的混淆矩阵绘制:
上代码:
这里阈值,0.1—0.9(我们通常在predict后得到的分数,>0.5认为1,<0.5认为0,即认为阈值为0.5,但是对于不同的模型,不同的数据特征,阈值则需要我们调整)

def DrawConfusion(newtestdata,clf2,y_test):
    fig,ax = plt.subplots(nrows=3,ncols=3,figsize=(20,20),facecolor='white')
    preb=clf2.predict_proba(newtestdata)
    pre=clf2.predict(newtestdata)
    for cnt,theld in enumerate([0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]):
        index=preb[:,1]>theld
        y_pred=pre
        y_pred[index]=1
        y_pred[np.logical_not(index)]=0
        acc=metrics.accuracy_score(y_test,y_pred )*100
        lmr_matrix=confusion_matrix(y_test,y_pred)


        i=cnt//3
        j=cnt%3
        fmt='d'
        thseld=0.5
        ax[i][j].matshow(lmr_matrix,interpolation='hermite',cmap=plt.cm.Blues)
        for x in range(lmr_matrix.shape[0]):
            for y in range(lmr_matrix.shape[1]):
                ax[i][j].text(x,y,format(lmr_matrix[x, y], fmt), horizontalalignment='center',verticalalignment='center',
                                 color='black' if lmr_matrix[x, y]>thseld else 'red',fontsize=20)
        ax[i][j].set_xlabel('True label') #坐标轴标签
        ax[i][j].set_ylabel('Predicted label') #坐标轴标签
        ax[i][j].set_title('theld={},acc={}'.format(theld,acc))

这里 interplolation中可以是[None, ‘none’, ‘nearest’, ‘bilinear’, ‘bicubic’, #’spline16’,
‘spline36’, ‘hanning’, ‘hamming’, ‘hermite’, ‘kaiser’, ‘quadric’,
‘catrom’, ‘gaussian’, ‘bessel’, ‘mitchell’, ‘sinc’, ‘lanczos’]

你可能感兴趣的:(机器学习之Python语言基础)