不同算法下校准曲线对比

from sklearn.datasets import make_classification
from sklearn.calibration import calibration_curve  # 校准曲线
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import time
import matplotlib.pyplot as plt


x,y = make_classification(n_samples=10000
                           ,n_features=20
                           ,n_informative=10 # 含有用信息的特征
                           ,n_classes=2
                           ,random_state=42)

xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.2,random_state=42)

for name,model in {'随机森林':RandomForestClassifier(),'高斯贝叶斯':GaussianNB(),'逻辑回归':LogisticRegression(C=0.1),'支持向量机':SVC()}.items():
    start = time.time()
    model.fit(xtrain,ytrain)
    end = time.time()
    print(f'{name}用时{end-start};正确率{accuracy_score(ytest,model.predict(xtest))}')



fig,axs = plt.subplots(2,4)
fig.suptitle('不同算法下的校准曲线')
for i in enumerate([5,15]):
    x,y = make_classification(n_samples=10000
                               ,n_features=20
                               #,n_informative=i[1]
                               ,n_redundant=i[1]
                               ,n_classes=2
                               ,random_state=42)

    for index,model in zip([0,1,2,3],{'随机森林':RandomForestClassifier(),
                                      '高斯贝叶斯':GaussianNB(),'逻辑回归':LogisticRegression(C=0.1),'支持向量机':SVC(probability=True)}.items()):
        model[1].fit(xtrain,ytrain)
        prob_true,prob_pred = calibration_curve(ytest,model[1].predict_proba(xtest)[:,1],n_bins=10)
        axs[i[0],index].plot([0,1],[0,1]) # 添加对角线做对比
        axs[i[0],index].plot(prob_pred,prob_true,linestyle='dashdot',label=f'{model[0]}(冗余特征{i[1]})')
        axs[i[0],index].legend(loc='upper center')

plt.show()

#结果如下:

不同算法下校准曲线对比_第1张图片

你可能感兴趣的:(机器学习,算法,机器学习,人工智能)