pytorch实现银行卡忠诚度预测(手写)

#导入模块
from sklearn import preprocessing as ps
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch

#导入数据
data = pd.read_csv(r"D:\pylearn\应用\银行卡忠诚度预测\train_pre.csv")
print(data)

pytorch实现银行卡忠诚度预测(手写)_第1张图片

#划分特征和标签
traindata = data.iloc[:,0:-1]
target = data["target"]
target = np.array(target)
#删除无用特征
print(len(traindata["card_id"].unique()) == data.shape[0])
traindata.drop("card_id",axis = 1,inplace=True)
#数据标准化
traindata = ps.scale(traindata)

# 划分训练集与测试集
import random
list1 = []
set_test = set(list1)
#测试集占30%
while len(set_test) <= int(201917 * 0.3):
    set_test.add(random.randint(0,201916))
    
list_test = list(set_test)
x_test = traindata[list_test]
y_test = target[list_test]
n = list(set([i for i in range(0,traindata.shape[0])]) - set_test)
x_train = traindata[n]
y_train = target[n]

#将数据转换为tensor类型
x_train = torch.tensor(x_train, dtype = torch.float32)
y_train = torch.tensor(y_train, dtype = torch.float32)
x_test = torch.tensor(x_test, dtype = torch.float32)
y_test = torch.tensor(y_test, dtype = torch.float32)

#定义参数(w、b)
torch.manual_seed(100)
w1 = torch.rand(4,84, dtype = torch.float32, requires_grad = True)
b1 = torch.rand(84, dtype = torch.float32, requires_grad = True)
w2 = torch.rand(84,40, dtype = torch.float32, requires_grad = True)
b2 = torch.rand(40, dtype = torch.float32, requires_grad = True)
w3 = torch.rand(40, 1, dtype = torch.float32, requires_grad = True)
b3 = torch.rand(1, dtype = torch.float32, requires_grad = True)
lr = 0.0004

#每1000条数据为一批
sx_train = torch.split(x_train,1000,0)
sy_train = torch.split(y_train,1000,0)
sx_test = torch.split(x_test,1000,0)
sy_test = torch.split(y_test,1000,0)

train_lossdata = []    
test_lossdata = []
# 循环10次
for epoch in range(10):
    train_losses = 0.0
    test_losses = 0.0
    #训练测试集
    for indexs ,ks in  enumerate(sx_train):
        hidden1 = ks.mm(w1) + b1
        func1 = torch.relu(hidden1)
        hidden2 = func1.mm(w2) + b2
        func2 = torch.relu(hidden2)

        train_predict = func2.mm(w3) + b3
        #定义损失
        train_loss = torch.mean(0.5 * (sy_train[indexs] - train_predict) ** 2)
        train_losses += train_loss
        #反向传播
        train_loss.backward()
        #参数更新
        w1.data.sub_(lr * w1.grad.data) 
        b1.data.sub_(lr * b1.grad.data)
        w2.data.sub_(lr * w2.grad.data) 
        b2.data.sub_(lr * b2.grad.data)
        w3.data.sub_(lr * w3.grad.data)
        b3.data.sub_(lr * b3.grad.data)
        #梯度清零
        w1.grad.data.zero_()
        b1.grad.data.zero_()
        w2.grad.data.zero_()
        b2.grad.data.zero_()
        w3.grad.data.zero_()
        b3.grad.data.zero_()
        

        if indexs % 10 == 0:
            #输出每一批次训练集损失
            print(indexs,":",train_loss.item())
    loss_trains = train_losses.item() / indexs
    #输出每一轮训练集损失
    print(epoch,":*******train_loss:{:.4f}".format(loss_trains))
    train_lossdata.append(round(loss_trains,4))
    #模型验证
    for index ,k in  enumerate(sx_test):
        hidden1 = k.mm(w1) + b1
        func1 = torch.relu(hidden1)
        hidden2 = func1.mm(w2) + b2
        func2 = torch.relu(hidden2)

        test_predict = func2.mm(w3) + b3
        test_loss = torch.mean(0.5 * (sy_test[index] - test_predict) ** 2)
        test_losses += test_loss
    loss_tests = test_losses.item() / index
    #输出每一轮验证集损失
    print(epoch,":*******test_loss:{:.4f}".format(loss_tests))
    test_lossdata.append(round(loss_tests,4))

训练10次后最后一次的均方误差:
在这里插入图片描述

#训练误差和验证误差列表
train_lossdata[0] = round(sum(train_lossdata[1:]) / len(train_lossdata[1:]) + 1 ,4) #训练集第一次误差太高为 57151.0035,画图不直观,用后面几个误差的均值+1代替
print(train_lossdata)
print(test_lossdata)

在这里插入图片描述

#画图
plt.plot(range(10),train_lossdata,c="r",label="train",linewidth=5)
plt.plot(range(10),test_lossdata,c="y",label="test",linewidth=5)
plt.legend()
plt.show()

pytorch实现银行卡忠诚度预测(手写)_第2张图片

你可能感兴趣的:(机器学习,pytorch,python)