二分类银行精准营销的单一分类算法尝试

# 加载教程中会用到的包
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

# 加载 warnings
import warnings

# 忽略 warnings
warnings.filterwarnings("ignore")
# 从csv文件中写入数据
train = pd.read_csv('bank_train_set.csv')
test = pd.read_csv('bank_test_set.csv')
print(plt.style.available)   # 列出所有可用的绘图样式
plt.style.use('ggplot')      # 使用“ggplot”样式   
data=pd.concat([train,test])

处理类型变量

for s in ['campaign','contact','default','education','housing','job','loan','marital','month','poutcome']:
    data=pd.concat([data,pd.get_dummies(data[s],prefix=s+'_')],axis=1)
    data.drop(s,axis=1,inplace=True)
data.drop(columns = ['ID'], inplace = True)

取出有目标值的集合

df_train=data[data['y'].notnull()]
df_test=data[data['y'].isnull()]

knn 不同参数

neig = np.arange(1, 100)
train_accuracy = []
test_accuracy = []
# 循环K值从1到25
for i, k in enumerate(neig):
    # k从1到25(不包括1、25)
    knn = KNeighborsClassifier(n_neighbors=k)
    # 使用KNN拟合
    knn.fit(x_train,y_train)
    # 训练集的准确度
    train_accuracy.append(knn.score(x_train, y_train))
    # 测试集的准确度
    test_accuracy.append(knn.score(x_test, y_test))

# 可视化
plt.figure(figsize=[13,8])
plt.plot(neig, test_accuracy, label = 'Testing Accuracy')
plt.plot(neig, train_accuracy, label = 'Training Accuracy')
plt.legend()
plt.title('-value VS Accuracy')
plt.xlabel('Number of Neighbors')
plt.ylabel('Accuracy')
plt.xticks(neig)
plt.savefig('graph.png')
plt.show()
print("Best accuracy is {} with K = {}".format(np.max(test_accuracy),1+test_accuracy.index(np.max(test_accuracy))))

knn

from sklearn.model_selection import cross_val_score
neig = np.arange(1, 100)
train_accuracy = []
test_accuracy = []
# 循环K值从1到100
for i, k in enumerate(neig):
    # k从1到100(不包括1、100)
    knn = KNeighborsClassifier(n_neighbors=k)
    cv_result=cross_val_score(knn,x,y,cv=5,scoring='accuracy')#把数据自动分成五组,然后得到每组的准确度
    # 测试集的准确度
    test_accuracy.append(np.sum(cv_result)/5)

# 可视化
plt.figure(figsize=[13,8])
plt.plot(neig, test_accuracy, label = 'Testing Accuracy')
plt.legend()
plt.title('-value VS Accuracy')
plt.xlabel('Number of Neighbors')
plt.ylabel('Accuracy')
plt.xticks(neig)
plt.savefig('graph.png')
plt.show()

print("Best accuracy is {} with K = {}".format(np.max(test_accuracy),1+test_accuracy.index(np.max(test_accuracy))))
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
neig = np.arange(1, 100)
train_accuracy = []
test_accuracy = []
# 循环K值从1到25
for i, k in enumerate(neig):
    # k从1到25(不包括1、25)
    clf = RandomForestClassifier(n_estimators=k, random_state=0)
    cv_result=cross_val_score(clf,x,y,cv=5,scoring='accuracy')#把数据自动分成五组,然后得到每组的准确度
    # 测试集的准确度
    test_accuracy.append(np.sum(cv_result)/5)

# 可视化
plt.figure(figsize=[13,8])
plt.plot(neig, test_accuracy, label = 'Testing Accuracy')
plt.legend()
plt.title('-value VS Accuracy')
plt.xlabel('Number of Trees')
plt.ylabel('Accuracy')
plt.xticks(neig)
plt.savefig('graph.png')
plt.show()
print("Best accuracy is {} with K = {}".format(np.max(test_accuracy),1+test_accuracy.index(np.max(test_accuracy))))
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
neig = np.arange(1, 50)
train_accuracy = []
test_accuracy = []
# 循环K值从1到25
for i, k in enumerate(neig):
    # k从1到25(不包括1、25)
    clf = RandomForestClassifier(n_estimators=4,max_depth=k,
                                 random_state=0)

    cv_result=cross_val_score(clf,x,y,cv=5,scoring='accuracy')#把数据自动分成五组,然后得到每组的准确度
    # 测试集的准确度
    test_accuracy.append(np.sum(cv_result)/5)

# 可视化
plt.figure(figsize=[13,8])
plt.plot(neig, test_accuracy, label = 'Testing Accuracy')
plt.legend()
plt.title('-value VS Accuracy')
plt.xlabel('max_depth')
plt.ylabel('Accuracy')
plt.xticks(neig)
plt.savefig('graph.png')
plt.show()
print("Best accuracy is {} with K = {}".format(np.max(test_accuracy),1+test_accuracy.index(np.max(test_accuracy))))
Unknown-7.png

你可能感兴趣的:(二分类银行精准营销的单一分类算法尝试)