DeepFM主要在FNN和PNN的基础上,采用并行方式,结合FM Layer和Deep Layer,提高模型计算效率。
主要功能:有效地训练出交叉特征的权重
模型公式:
FM Layer主要是由一阶特征和二阶特征组合,再经过Sigmoid得到logits
FM Layer的优点:
from torch_rechub.basic.layers import FM, MLP, LR, EmbeddingLayer
from tqdm import tqdm
import torch
class DeepFM(torch.nn.Module):
def __init__(self, deep_features, fm_features, mlp_params):
"""
Deep和FM分别处理deep_features和fm_features两个不同的特征
mlp_params表示MLP多层感知机的参数
"""
super().__init__()
self.deep_features = deep_features
self.fm_features = fm_features
self.deep_dims = sum([fea.embed_dim for fea in deep_features])
self.fm_dims = sum([fea.embed_dim for fea in fm_features])
# LR建模一阶特征交互
self.linear = LR(self.fm_dims)
# FM建模二阶特征交互
self.fm = FM(reduce_sum=True)
# 对特征做嵌入表征
self.embedding = EmbeddingLayer(deep_features + fm_features)
# 设置MLP多层感知机
self.mlp = MLP(self.deep_dims, **mlp_params)
def forward(self, x):
# Dense Embeddings
input_deep = self.embedding(x, self.deep_features, squeeze_dim=True)
input_fm = self.embedding(x, self.fm_features, squeeze_dim=False)
y_linear = self.linear(input_fm.flatten(start_dim=1))
y_fm = self.fm(input_fm)
y_deep = self.mlp(input_deep)
# 最终的预测值为一阶特征交互,二阶特征交互,以及深层模型的组合
y = y_linear + y_fm + y_deep
# 利用sigmoid将预测得分规整到0,1区间内
return torch.sigmoid(y.squeeze(1))
Activation Unit:
作用: 在当前候选广告和用户的历史行为之间引入注意力的机制,与当前商品更加相关的历史行为更能促进用户的点击行为。
举例: 在当前候选广告和用户的历史行为之间引入注意力的机制,与当前商品更加相关的历史行为更能促进用户的点击行为。
# 实现注意力部分
class ActivationUnit(torch.nn.Module):
def __init__(self, emb_dim, dims=[36], activation="dice", use_softmax=False):
super(ActivationUnit, self).__init__()
self.emb_dim = emb_dim
self.use_softmax = use_softmax
# Dice(36)
self.attention = MLP(4 * self.emb_dim, dims=dims, activation=activation)
def forward(self, history, target):
seq_length = history.size(1)
target = target.unsqueeze(1).expand(-1, seq_length, -1)
# Concat
att_input = torch.cat([target, history, target - history, target * history], dim=-1)
# Dice(36)
att_weight = self.attention(att_input.view(-1, 4 * self.emb_dim))
# Linear(1)
att_weight = att_weight.view(-1, seq_length)
if self.use_softmax:
att_weight = att_weight.softmax(dim=-1)
# (batch_size,emb_dim)
output = (att_weight.unsqueeze(-1) * history).sum(dim=1)
return output
# DIN的实现
class DIN(torch.nn.Module):
def __init__(self, features, history_features, target_features, mlp_params, attention_mlp_params):
super().__init__()
self.features = features
self.history_features = history_features
self.target_features = target_features
# 历史行为特征个数
self.num_history_features = len(history_features)
# 计算所有的dim
self.all_dims = sum([fea.embed_dim for fea in features + history_features + target_features])
# 构建Embeding层
self.embedding = EmbeddingLayer(features + history_features + target_features)
# 构建注意力层
self.attention_layers = nn.ModuleList(
[ActivationUnit(fea.embed_dim, **attention_mlp_params) for fea in self.history_features])
self.mlp = MLP(self.all_dims, activation="dice", **mlp_params)
def forward(self, x):
embed_x_features = self.embedding(x, self.features)
embed_x_history = self.embedding(x, self.history_features)
embed_x_target = self.embedding(x, self.target_features)
attention_pooling = []
for i in range(self.num_history_features):
attention_seq = self.attention_layers[i](embed_x_history[:, i, :, :], embed_x_target[:, i, :])
attention_pooling.append(attention_seq.unsqueeze(1))
# SUM Pooling
attention_pooling = torch.cat(attention_pooling, dim=1)
# Concat & Flatten
mlp_in = torch.cat([
attention_pooling.flatten(start_dim=1),
embed_x_target.flatten(start_dim=1),
embed_x_features.flatten(start_dim=1)
], dim=1)
# 可传入[80, 200]
y = self.mlp(mlp_in)
# 代码中使用的是sigmoid(1)+BCELoss,效果和论文中的DIN模型softmax(2)+CELoss类似
return torch.sigmoid(y.squeeze(1))
我的组队学习
推荐模型之DeepFM与DIN_莱维贝贝、的博客-CSDN博客