利用逻辑回归,决策树,svm计算准确率和AUC值

利用逻辑回归,决策树,svm计算准确率和AUC值

  • 导入的包
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.preprocessing import label_binarize
  • 读取数据
# 读取数据集
data_all = pd.read_csv('/home/infisa/wjht/project/DataWhale/data_all.csv', encoding='gbk')
  • 划分数据集
# 划分训练集和测试集
features = [x for x in data_all.columns if x not in ['status']]
X = data_all[features]
y = data_all['status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2018)
  • 构建模型
lr = LogisticRegression()  # 逻辑回归模型
lr.fit(X_train, y_train)

tr = DecisionTreeClassifier()  # 决策树模型
tr.fit(X_train, y_train)

svm = SVC()  # SVM模型
svm.fit(X_train, y_train)

*模型评分

# 模型评分
lr_score = lr.score(X_test, y_test)
print(lr_score)
'lr_score:0.7484232655921513'

tr_score = tr.score(X_test, y_test)
'tr_score:0.6797477224947442'

svm_score = svm.score(X_test, y_test)
'svm_score:0.7484232655921513'
  • 计算AUC值
# 计算auc值

y_test_hot = label_binarize(y_test,classes =(0, 1)) # 将测试集标签数据用二值化编码的方式转换为矩阵

lr_y_score=lr.decision_function(X_test) # 得到预测的损失值

svm_y_score = svm.decision_function(X_test) # 得到预测的损失值

lr_fpr,lr_tpr,lr_threasholds=metrics.roc_curve(y_test_hot.ravel(),lr_y_score.ravel()) # 计算ROC的值,lr_threasholds为阈值

svm_fpr,svm_tpr,svm_threasholds=metrics.roc_curve(y_test_hot.ravel(),svm_y_score.ravel())#计算ROC的值,svm_threasholds为阈值

lr_auc=metrics.auc(lr_fpr,lr_tpr)
'lr_auc:0.5674626772245001'
svm_auc=metrics.auc(lr_fpr,lr_tpr)
'svm_auc:0.5674626772245001'

你可能感兴趣的:(利用逻辑回归,决策树,svm计算准确率和AUC值)