内容:根据信用卡持卡人背景信息(年龄、教育水平、当前工作年限、当前居住年限、家庭收入、债务占收入比例、信用卡负债、其他负债 )预测还款拖欠情况。用分类算法来建模预测
数据导入;
import pandas as pd
data = pd.read_csv('C:\\Users\\dell\\Desktop\\datamining\\train__UnB.csv')
train_data=data.iloc[:,0:-1]
train_label=data.iloc[:,-1]
data1=pd.read_csv('C:\\Users\\dell\\Desktop\\datamining\\test__UnB.csv')
test_data=data1.iloc[:,0:-1]
test_label=data1.iloc[:,-1]
数据源:
机器学习分类预测
train_label.unique()
从标签数值看出,这是一个二分类问题。
KNN算法
from sklearn.model_selection import cross_val_score
from sklearn import neighbors
knnModel = neighbors.KNeighborsClassifier(n_neighbors=2)
knnModel.fit(train_data,train_label)
score=np.mean(
cross_val_score(
knnModel,
train_data,train_label,cv=5
)
)
result=knnModel.predict(test_data)
##计算accuracy,precision,recall,F1
TP=0;FP=0;FN=0;TN=0
for i in range(len(test_label)):
if test_label[i]==1 and result[i]==1:
TP+=1
elif test_label[i]==1 and result[i]==0:
FN+=1
elif test_label[i]==0 and result[i]==1:
FP+=1
elif test_label[i]==0 and result[i]==0:
TN+=1
accuracy=(TP+TN)/len(test_label)
precision=TP/(TP+FP)
recall=TP/(TP+FN)
F1=2*precision*recall/(precision+recall)
print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))
朴素贝叶斯
from sklearn.naive_bayes import MultinomialNB
MNBModel = MultinomialNB()
MNBModel.fit(train_data,train_label)
score=np.mean(
cross_val_score(
MNBModel,
train_data,train_label,cv=5
)
)
result=MNBModel.predict(test_data)
##计算accuracy,precision,recall,F1
TP=0;FP=0;FN=0;TN=0
for i in range(len(test_label)):
if test_label[i]==1 and result[i]==1:
TP+=1
elif test_label[i]==1 and result[i]==0:
FN+=1
elif test_label[i]==0 and result[i]==1:
FP+=1
elif test_label[i]==0 and result[i]==0:
TN+=1
accuracy=(TP+TN)/len(test_label)
precision=TP/(TP+FP)
recall=TP/(TP+FN)
F1=2*precision*recall/(precision+recall)
print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))
SVM
from sklearn import svm
svcModel=svm.SVC(kernel='rbf')
svcModel.fit(train_data,train_label)
score=np.mean(
cross_val_score(
svcModel,
train_data,train_label,cv=5
)
)
result = svcModel.predict(test_data)
##计算accuracy,precision,recall,F1
TP=1;FP=1;FN=1;TN=1
for i in range(len(test_label)):
if test_label[i]==1 and result[i]==1:
TP+=1
elif test_label[i]==1 and result[i]==0:
FN+=1
elif test_label[i]==0 and result[i]==1:
FP+=1
elif test_label[i]==0 and result[i]==0:
TN+=1
accuracy=(TP+TN)/len(test_label)
precision=TP/(TP+FP)
recall=TP/(TP+FN)
F1=2*precision*recall/(precision+recall)
print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))
决策树算法
from sklearn.tree import DecisionTreeClassifier
dtModel = DecisionTreeClassifier(max_leaf_nodes=8)
dtModel.fit(train_data, train_label)
score=np.mean(
cross_val_score(
dtModel,
train_data,train_label,cv=5
)
)
result=dtModel.predict(test_data)
##计算accuracy,precision,recall,F1
TP=1;FP=1;FN=1;TN=1
for i in range(len(test_label)):
if test_label[i]==1 and result[i]==1:
TP+=1
elif test_label[i]==1 and result[i]==0:
FN+=1
elif test_label[i]==0 and result[i]==1:
FP+=1
elif test_label[i]==0 and result[i]==0:
TN+=1
accuracy=(TP+TN)/len(test_label)
precision=TP/(TP+FP)
recall=TP/(TP+FN)
F1=2*precision*recall/(precision+recall)
print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))
随机森林
from sklearn.ensemble import RandomForestClassifier
rfcModel = RandomForestClassifier(n_estimators=8, max_leaf_nodes=None)
rfcModel.fit(train_data,train_label)
score=np.mean(
cross_val_score(
rfcModel,
train_data,train_label,cv=5
)
)
result=rfcModel.predict(test_data)
##计算accuracy,precision,recall,F1
TP=1;FP=1;FN=1;TN=1
for i in range(len(test_label)):
if test_label[i]==1 and result[i]==1:
TP+=1
elif test_label[i]==1 and result[i]==0:
FN+=1
elif test_label[i]==0 and result[i]==1:
FP+=1
elif test_label[i]==0 and result[i]==0:
TN+=1
accuracy=(TP+TN)/len(test_label)
precision=TP/(TP+FP)
recall=TP/(TP+FN)
F1=2*precision*recall/(precision+recall)
print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))