Pytorch 实现逻辑回归

因为项目的原因,要使用逻辑回归进行CTR预估,可以进行权重实时更新,并且对每个用户要使用提前计算好的权重初始化模型(自定义模型权重参数),在sklearn没有找到这个功能,所以自己写一个,顺便练习一下神经网络的写法

步骤

  • 定义模型
  • 前向求损失
  • 反向求梯度
  • 利用梯度更新模型权重

代码:

import torch
from torch import nn
from torch.autograd import Variable
import torch.utils.data as Data
from torch.nn import init
import numpy as np
import pandas as pd


class LogisticRegression(nn.Module):
    '''
    传入字典 {'weight':[[], []..],
             'bias':[[]]}来初始化权重
    元素类型必须为浮点型哈
    '''
    def __init__(self, n_features, *args, **kwargs):
        super(LogisticRegression, self).__init__()
        # n_featrues个输入,1个输出
        self.linear = nn.Linear(n_features, 1)
        self.sigmoid = nn.Sigmoid()
        
        use_random_weight=True
        if use_random_weight:
            # 取出 kwags 的值作为 weight and bias
            init_dict = args[0]
            weight = init_dict.get('weight')
            bias = init_dict.get('bias')
            # 如果要使用自定义的权重值,类型必须为 nn.Parameter
            self.weight = nn.Parameter(torch.tensor(weight))
            self.bias = torch.tensor(bias)
            self.init_weight()
    
    def init_weight(self, use_former_weight=False):
        if not use_former_weight:
            init.normal(self.linear.weight)
            init.constant_(self.linear.bias, val=0)
        else:
            self._init_weight(self.weight, self.bias)
    
    def _init_weight(self, feature_weight, bias):
        self.linear.weight = feature_weight
        self.linear.bias.data = bias
    
    def forward(self, x):
        '''
        当调用 model(x) 的时候触发 forward
        是 nn.Module 的 __call__() 函数定义的机制
        '''
        y = self.linear(x)
        y_pred = self.sigmoid(y)
        return y_pred


class MuiltClassifier(LogisticRegression):
    '''
	模型可以进行在线学习(增量更新)
	可以使用已有权重初始化模型的权重
	'''
    def __init__(self, n_features, *args, **kwargs):
        super(MuiltClassifier, self).\
            __init__(n_features, *args, **kwargs)

    def check_tensor(self, obj) -> torch.tensor:
        '''检查训练集和测试集的类型'''
        if isinstance(obj, pd.DataFrame):
            return torch.tensor(obj.to_numpy())
        if isinstance(obj, np.ndarray):
            return torch.tensor(obj)
        if isinstance(obj, list):
            return torch.tensor(obj)

        return obj

    def show_train_info(self, y_hat, y):
        # 打印详情信息
        np_y_hat = y_hat.detach().numpy()
        np_y = y.detach().numpy()
        right_num = 0
        for index, label in enumerate(np_y):
            if label > 0.5 and np_y_hat[index] > 0.5:
                right_num += 1
            if label < 0.5 and np_y_hat[index] < 0.5:
                right_num += 1
        print(f'这一轮迭代模型的准确率为: {right_num / np_y.shape[0]}')

    def fit(self, X_train, y_train, max_iter,\
             learning_rate, partial_fit=False, show_detail=True):
        # 处理训练集
        X_train = self.check_tensor(X_train)
        y_train = self.check_tensor(y_train)
        
        # 定义损失
        # size_average=True 表示对损失求平均
        criterion = torch.nn.BCELoss(size_average=True)
        optimizer = torch.optim.SGD(self.parameters(),\
                                    lr=learning_rate)
        
        # 判断是否使用增量学习
        if partial_fit:
            max_iter = 1
        
        for i in range(max_iter):
            y_hat = self(X_train)

            # 打印每次迭代的模型accuracy
            if show_detail:
                self.show_train_info(y_hat, y_train)

            loss = criterion(y_hat, y_train) # 前向求损失
            optimizer.zero_grad() # 梯度清零
            loss.backward() # 反向传播求梯度
            optimizer.step() # 更新权重
        
        return self


if __name__ == "__main__":
    X = Variable(torch.Tensor([
        [0.6, 0.8, 1.1], 
        [1.0, 1.5, 0.9], 
        [3.5, 5.5, 2.0],
        [4.0, 3.4, 4.4]
    ]))
    y = Variable(torch.Tensor([[0.], [0.], [1.], [1.]]))
    model = MuiltClassifier(3, {'weight':[[3.0, 3.0, 4.0]], 'bias':[0.0]})
    model = model.fit(X, y, max_iter=10, learning_rate=0.03)
    print(model.weight)
    model.init_weight(use_former_weight=True)
    print(model.weight)
    # model.init_weight(use_former_weight=False)
    # print(model.weight)

    batch_data_x = Variable(torch.Tensor([
        [0.2, 0.3, 0.1]
    ]))
    batch_data_y = Variable(torch.Tensor([[0.]]))
    model.fit(batch_data_x, batch_data_y, max_iter=1,\
         partial_fit=True, learning_rate=0.2)
    print(model.weight)

你可能感兴趣的:(机器学习,推荐系统,深度学习)