python与数据挖掘 上机实验5_python数据挖掘test5

内容:根据信用卡持卡人背景信息(年龄、教育水平、当前工作年限、当前居住年限、家庭收入、债务占收入比例、信用卡负债、其他负债 )预测还款拖欠情况。用分类算法来建模预测

数据导入;

import pandas as pd

data = pd.read_csv('C:\\Users\\dell\\Desktop\\datamining\\train__UnB.csv')

train_data=data.iloc[:,0:-1]

train_label=data.iloc[:,-1]

data1=pd.read_csv('C:\\Users\\dell\\Desktop\\datamining\\test__UnB.csv')

test_data=data1.iloc[:,0:-1]

test_label=data1.iloc[:,-1]

数据源:

机器学习分类预测

train_label.unique()

从标签数值看出,这是一个二分类问题。

KNN算法

from sklearn.model_selection import cross_val_score

from sklearn import neighbors

knnModel = neighbors.KNeighborsClassifier(n_neighbors=2)

knnModel.fit(train_data,train_label)

score=np.mean(

cross_val_score(

knnModel,

train_data,train_label,cv=5

)

)

result=knnModel.predict(test_data)

##计算accuracy,precision,recall,F1

TP=0;FP=0;FN=0;TN=0

for i in range(len(test_label)):

if test_label[i]==1 and result[i]==1:

TP+=1

elif test_label[i]==1 and result[i]==0:

FN+=1

elif test_label[i]==0 and result[i]==1:

FP+=1

elif test_label[i]==0 and result[i]==0:

TN+=1

accuracy=(TP+TN)/len(test_label)

precision=TP/(TP+FP)

recall=TP/(TP+FN)

F1=2*precision*recall/(precision+recall)

print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))

朴素贝叶斯

from sklearn.naive_bayes import MultinomialNB

MNBModel = MultinomialNB()

MNBModel.fit(train_data,train_label)

score=np.mean(

cross_val_score(

MNBModel,

train_data,train_label,cv=5

)

)

result=MNBModel.predict(test_data)

##计算accuracy,precision,recall,F1

TP=0;FP=0;FN=0;TN=0

for i in range(len(test_label)):

if test_label[i]==1 and result[i]==1:

TP+=1

elif test_label[i]==1 and result[i]==0:

FN+=1

elif test_label[i]==0 and result[i]==1:

FP+=1

elif test_label[i]==0 and result[i]==0:

TN+=1

accuracy=(TP+TN)/len(test_label)

precision=TP/(TP+FP)

recall=TP/(TP+FN)

F1=2*precision*recall/(precision+recall)

print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))

SVM

from sklearn import svm

svcModel=svm.SVC(kernel='rbf')

svcModel.fit(train_data,train_label)

score=np.mean(

cross_val_score(

svcModel,

train_data,train_label,cv=5

)

)

result = svcModel.predict(test_data)

##计算accuracy,precision,recall,F1

TP=1;FP=1;FN=1;TN=1

for i in range(len(test_label)):

if test_label[i]==1 and result[i]==1:

TP+=1

elif test_label[i]==1 and result[i]==0:

FN+=1

elif test_label[i]==0 and result[i]==1:

FP+=1

elif test_label[i]==0 and result[i]==0:

TN+=1

accuracy=(TP+TN)/len(test_label)

precision=TP/(TP+FP)

recall=TP/(TP+FN)

F1=2*precision*recall/(precision+recall)

print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))

决策树算法

from sklearn.tree import DecisionTreeClassifier

dtModel = DecisionTreeClassifier(max_leaf_nodes=8)

dtModel.fit(train_data, train_label)

score=np.mean(

cross_val_score(

dtModel,

train_data,train_label,cv=5

)

)

result=dtModel.predict(test_data)

##计算accuracy,precision,recall,F1

TP=1;FP=1;FN=1;TN=1

for i in range(len(test_label)):

if test_label[i]==1 and result[i]==1:

TP+=1

elif test_label[i]==1 and result[i]==0:

FN+=1

elif test_label[i]==0 and result[i]==1:

FP+=1

elif test_label[i]==0 and result[i]==0:

TN+=1

accuracy=(TP+TN)/len(test_label)

precision=TP/(TP+FP)

recall=TP/(TP+FN)

F1=2*precision*recall/(precision+recall)

print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))

随机森林

from sklearn.ensemble import RandomForestClassifier

rfcModel = RandomForestClassifier(n_estimators=8, max_leaf_nodes=None)

rfcModel.fit(train_data,train_label)

score=np.mean(

cross_val_score(

rfcModel,

train_data,train_label,cv=5

)

)

result=rfcModel.predict(test_data)

##计算accuracy,precision,recall,F1

TP=1;FP=1;FN=1;TN=1

for i in range(len(test_label)):

if test_label[i]==1 and result[i]==1:

TP+=1

elif test_label[i]==1 and result[i]==0:

FN+=1

elif test_label[i]==0 and result[i]==1:

FP+=1

elif test_label[i]==0 and result[i]==0:

TN+=1

accuracy=(TP+TN)/len(test_label)

precision=TP/(TP+FP)

recall=TP/(TP+FN)

F1=2*precision*recall/(precision+recall)

print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))

你可能感兴趣的:(python与数据挖掘,上机实验5)