python数据挖掘test5

内容:根据信用卡持卡人背景信息(年龄、教育水平、当前工作年限、当前居住年限、家庭收入、债务占收入比例、信用卡负债、其他负债 )预测还款拖欠情况。用分类算法来建模预测


数据导入;

import pandas as pd
data = pd.read_csv('C:\\Users\\dell\\Desktop\\datamining\\train__UnB.csv')
train_data=data.iloc[:,0:-1]
train_label=data.iloc[:,-1]
data1=pd.read_csv('C:\\Users\\dell\\Desktop\\datamining\\test__UnB.csv')
test_data=data1.iloc[:,0:-1]
test_label=data1.iloc[:,-1]

数据源:


python数据挖掘test5_第1张图片

机器学习分类预测

train_label.unique()

从标签数值看出,这是一个二分类问题。

  • KNN算法
from sklearn.model_selection import cross_val_score
from sklearn import neighbors
knnModel = neighbors.KNeighborsClassifier(n_neighbors=2)
knnModel.fit(train_data,train_label)
score=np.mean(
        cross_val_score(
        knnModel,
        train_data,train_label,cv=5
        )
)
result=knnModel.predict(test_data)
##计算accuracy,precision,recall,F1
TP=0;FP=0;FN=0;TN=0
for i in range(len(test_label)):
    if test_label[i]==1 and result[i]==1:
        TP+=1
    elif test_label[i]==1 and result[i]==0:
        FN+=1
    elif test_label[i]==0 and result[i]==1:
        FP+=1
    elif test_label[i]==0 and result[i]==0:
        TN+=1
accuracy=(TP+TN)/len(test_label)
precision=TP/(TP+FP)
recall=TP/(TP+FN)
F1=2*precision*recall/(precision+recall)   
print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))  
python数据挖掘test5_第2张图片
  • 朴素贝叶斯
from sklearn.naive_bayes import MultinomialNB
MNBModel = MultinomialNB()
MNBModel.fit(train_data,train_label)
score=np.mean(
        cross_val_score(
        MNBModel,
        train_data,train_label,cv=5
        )
)
result=MNBModel.predict(test_data)
##计算accuracy,precision,recall,F1
TP=0;FP=0;FN=0;TN=0
for i in range(len(test_label)):
    if test_label[i]==1 and result[i]==1:
        TP+=1
    elif test_label[i]==1 and result[i]==0:
        FN+=1
    elif test_label[i]==0 and result[i]==1:
        FP+=1
    elif test_label[i]==0 and result[i]==0:
        TN+=1
accuracy=(TP+TN)/len(test_label)
precision=TP/(TP+FP)
recall=TP/(TP+FN)
F1=2*precision*recall/(precision+recall)   
print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))  
python数据挖掘test5_第3张图片
  • SVM
from sklearn import svm
svcModel=svm.SVC(kernel='rbf')
svcModel.fit(train_data,train_label)
score=np.mean(
        cross_val_score(
        svcModel,
        train_data,train_label,cv=5
        )
)
result = svcModel.predict(test_data)
##计算accuracy,precision,recall,F1
TP=1;FP=1;FN=1;TN=1
for i in range(len(test_label)):
    if test_label[i]==1 and result[i]==1:
        TP+=1
    elif test_label[i]==1 and result[i]==0:
        FN+=1
    elif test_label[i]==0 and result[i]==1:
        FP+=1
    elif test_label[i]==0 and result[i]==0:
        TN+=1
accuracy=(TP+TN)/len(test_label)
precision=TP/(TP+FP)
recall=TP/(TP+FN)
F1=2*precision*recall/(precision+recall)   
print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))  
python数据挖掘test5_第4张图片
  • 决策树算法
from sklearn.tree import DecisionTreeClassifier
dtModel = DecisionTreeClassifier(max_leaf_nodes=8)
dtModel.fit(train_data, train_label)
score=np.mean(
        cross_val_score(
        dtModel,
        train_data,train_label,cv=5
        )
)
result=dtModel.predict(test_data)
##计算accuracy,precision,recall,F1
TP=1;FP=1;FN=1;TN=1
for i in range(len(test_label)):
    if test_label[i]==1 and result[i]==1:
        TP+=1
    elif test_label[i]==1 and result[i]==0:
        FN+=1
    elif test_label[i]==0 and result[i]==1:
        FP+=1
    elif test_label[i]==0 and result[i]==0:
        TN+=1
accuracy=(TP+TN)/len(test_label)
precision=TP/(TP+FP)
recall=TP/(TP+FN)
F1=2*precision*recall/(precision+recall)   
print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))  
python数据挖掘test5_第5张图片
  • 随机森林
from sklearn.ensemble import RandomForestClassifier
rfcModel = RandomForestClassifier(n_estimators=8, max_leaf_nodes=None)
rfcModel.fit(train_data,train_label)
score=np.mean(
        cross_val_score(
        rfcModel,
        train_data,train_label,cv=5
        )
)
result=rfcModel.predict(test_data)
##计算accuracy,precision,recall,F1
TP=1;FP=1;FN=1;TN=1
for i in range(len(test_label)):
    if test_label[i]==1 and result[i]==1:
        TP+=1
    elif test_label[i]==1 and result[i]==0:
        FN+=1
    elif test_label[i]==0 and result[i]==1:
        FP+=1
    elif test_label[i]==0 and result[i]==0:
        TN+=1
accuracy=(TP+TN)/len(test_label)
precision=TP/(TP+FP)
recall=TP/(TP+FN)
F1=2*precision*recall/(precision+recall)   
print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))  
python数据挖掘test5_第6张图片

你可能感兴趣的:(python数据挖掘test5)