因为项目的原因,要使用逻辑回归进行CTR预估,可以进行权重实时更新,并且对每个用户要使用提前计算好的权重初始化模型(自定义模型权重参数),在sklearn没有找到这个功能,所以自己写一个,顺便练习一下神经网络的写法
步骤:
代码:
import torch
from torch import nn
from torch.autograd import Variable
import torch.utils.data as Data
from torch.nn import init
import numpy as np
import pandas as pd
class LogisticRegression(nn.Module):
'''
传入字典 {'weight':[[], []..],
'bias':[[]]}来初始化权重
元素类型必须为浮点型哈
'''
def __init__(self, n_features, *args, **kwargs):
super(LogisticRegression, self).__init__()
# n_featrues个输入,1个输出
self.linear = nn.Linear(n_features, 1)
self.sigmoid = nn.Sigmoid()
use_random_weight=True
if use_random_weight:
# 取出 kwags 的值作为 weight and bias
init_dict = args[0]
weight = init_dict.get('weight')
bias = init_dict.get('bias')
# 如果要使用自定义的权重值,类型必须为 nn.Parameter
self.weight = nn.Parameter(torch.tensor(weight))
self.bias = torch.tensor(bias)
self.init_weight()
def init_weight(self, use_former_weight=False):
if not use_former_weight:
init.normal(self.linear.weight)
init.constant_(self.linear.bias, val=0)
else:
self._init_weight(self.weight, self.bias)
def _init_weight(self, feature_weight, bias):
self.linear.weight = feature_weight
self.linear.bias.data = bias
def forward(self, x):
'''
当调用 model(x) 的时候触发 forward
是 nn.Module 的 __call__() 函数定义的机制
'''
y = self.linear(x)
y_pred = self.sigmoid(y)
return y_pred
class MuiltClassifier(LogisticRegression):
'''
模型可以进行在线学习(增量更新)
可以使用已有权重初始化模型的权重
'''
def __init__(self, n_features, *args, **kwargs):
super(MuiltClassifier, self).\
__init__(n_features, *args, **kwargs)
def check_tensor(self, obj) -> torch.tensor:
'''检查训练集和测试集的类型'''
if isinstance(obj, pd.DataFrame):
return torch.tensor(obj.to_numpy())
if isinstance(obj, np.ndarray):
return torch.tensor(obj)
if isinstance(obj, list):
return torch.tensor(obj)
return obj
def show_train_info(self, y_hat, y):
# 打印详情信息
np_y_hat = y_hat.detach().numpy()
np_y = y.detach().numpy()
right_num = 0
for index, label in enumerate(np_y):
if label > 0.5 and np_y_hat[index] > 0.5:
right_num += 1
if label < 0.5 and np_y_hat[index] < 0.5:
right_num += 1
print(f'这一轮迭代模型的准确率为: {right_num / np_y.shape[0]}')
def fit(self, X_train, y_train, max_iter,\
learning_rate, partial_fit=False, show_detail=True):
# 处理训练集
X_train = self.check_tensor(X_train)
y_train = self.check_tensor(y_train)
# 定义损失
# size_average=True 表示对损失求平均
criterion = torch.nn.BCELoss(size_average=True)
optimizer = torch.optim.SGD(self.parameters(),\
lr=learning_rate)
# 判断是否使用增量学习
if partial_fit:
max_iter = 1
for i in range(max_iter):
y_hat = self(X_train)
# 打印每次迭代的模型accuracy
if show_detail:
self.show_train_info(y_hat, y_train)
loss = criterion(y_hat, y_train) # 前向求损失
optimizer.zero_grad() # 梯度清零
loss.backward() # 反向传播求梯度
optimizer.step() # 更新权重
return self
if __name__ == "__main__":
X = Variable(torch.Tensor([
[0.6, 0.8, 1.1],
[1.0, 1.5, 0.9],
[3.5, 5.5, 2.0],
[4.0, 3.4, 4.4]
]))
y = Variable(torch.Tensor([[0.], [0.], [1.], [1.]]))
model = MuiltClassifier(3, {'weight':[[3.0, 3.0, 4.0]], 'bias':[0.0]})
model = model.fit(X, y, max_iter=10, learning_rate=0.03)
print(model.weight)
model.init_weight(use_former_weight=True)
print(model.weight)
# model.init_weight(use_former_weight=False)
# print(model.weight)
batch_data_x = Variable(torch.Tensor([
[0.2, 0.3, 0.1]
]))
batch_data_y = Variable(torch.Tensor([[0.]]))
model.fit(batch_data_x, batch_data_y, max_iter=1,\
partial_fit=True, learning_rate=0.2)
print(model.weight)