# -*-coding:utf-8-*- """ @author:taoshouzheng @time:2019/8/8 15:19 @email:[email protected] """ import torch import torch.nn as nn from torch.nn import Parameter from torch.nn import init from torch import Tensor from torch.autograd import Variable from torch import optim import math import numpy as np import random # 先定义Contextual LSTM的一个cell class ContextualLSTMCell(nn.Module): """basic Contextual LSTM Cell""" def __init__(self, input_size, hidden_size, contextual_type, bias=True): super(ContextualLSTMCell, self).__init__() # 输入的维度 self.input_size = input_size # 隐状态的维度 self.hidden_size = hidden_size # 上下文类型 self.contextual_type = contextual_type # 是否有偏置项 self.bias = bias # input gate parameter self.w_ii = Parameter(Tensor(hidden_size, input_size)) self.w_hi = Parameter(Tensor(hidden_size, hidden_size)) self.w_ci = Parameter(Tensor(hidden_size, hidden_size)) self.w_bi = Parameter(Tensor(hidden_size, contextual_type)) self.bias_i = Parameter(Tensor(hidden_size, 1)) # forget gate parameter self.w_if = Parameter(Tensor(hidden_size, input_size)) self.w_hf = Parameter(Tensor(hidden_size, hidden_size)) self.w_cf = Parameter(Tensor(hidden_size, hidden_size)) self.w_bf = Parameter(Tensor(hidden_size, contextual_type)) self.bias_f = Parameter(Tensor(hidden_size, 1)) # cell memory parameter self.w_ic = Parameter(Tensor(hidden_size, input_size)) self.w_hc = Parameter(Tensor(hidden_size, hidden_size)) self.w_bc = Parameter(Tensor(hidden_size, contextual_type)) self.bias_c = Parameter(Tensor(hidden_size, 1)) # output gate parameter self.w_io = Parameter(Tensor(hidden_size, input_size)) self.w_ho = Parameter(Tensor(hidden_size, hidden_size)) self.w_co = Parameter(Tensor(hidden_size, hidden_size)) self.w_bo = Parameter(Tensor(hidden_size, contextual_type)) self.bias_o = Parameter(Tensor(hidden_size, 1)) self.reset_parameters() # 初始化参数:一种参数初始化策略 def reset_parameters(self): stdv = 1.0 / math.sqrt(self.hidden_size) for parameter in self.parameters(): init.uniform_(parameter, -stdv, stdv) # 定义前向传播过程 def forward(self, x, h, c, b): """ :param x: 当前时刻的输入 :param h: 上一时刻的隐状态 :param c: 上一时刻的记忆单元 :param b: 当前时刻输入的上下文 :return: """ # input gate ci = torch.sigmoid(self.w_ii @ x + self.w_hi @ h + self.w_ci @ c + self.w_bi @ b + self.bias_i) # forget gate cf = torch.sigmoid(self.w_if @ x + self.w_hf @ h + self.w_cf @ c + self.w_bf @ b + self.bias_f) # cell memory cc = cf * c + ci * torch.tanh(self.w_ic @ x + self.w_hc @ h + self.w_bc @ b + self.bias_c) # output gate co = torch.sigmoid(self.w_io @ x - self.w_ho @ h + self.w_co @ c + self.w_bo @ b + self.bias_o) # hidden state ch = co * torch.tanh(cc) return ch, cc # 初始化隐状态h和记忆单元c def init_state(self, batch_size, hidden_size): h_init = Variable(torch.rand(batch_size, hidden_size).t()) c_init = Variable(torch.rand(batch_size, hidden_size).t()) return h_init, c_init # 定义完整的Contextual LSTM模型 class ContextualLSTM(nn.Module): """Contextual LSTM model""" def __init__(self, num_steps, num_layers, input_size, hidden_size, contextual_type, bias=True): super(ContextualLSTM, self).__init__() # 序列长度 self.num_steps = num_steps # 网络层数 self.num_layers = num_layers # 输入维度 self.input_size = input_size # 隐状态维度 self.hidden_size = hidden_size # 上下文类型 self.contextual_type = contextual_type # 是否有偏置 self.bias = bias # 所有层的Contextual LSTM cell组成的列表 self._all_layers = [] for k in range(self.num_layers): layer_name = 'cell{}'.format(k) cell = ContextualLSTMCell(self.input_size, self.hidden_size, self.contextual_type, self.bias) setattr(self, layer_name, cell) self._all_layers.append(cell) # Contextual LSTM模型的前向传播逻辑 def forward(self, inputs, contexts): # 用于存放内部状态的列表 internal_state = [] # 用于存放最顶层的Contextual LSTM的输出隐状态 outputs = [] # 横向,遍历所有的time step for step in range(self.num_steps): # ????????这个地方有问题 x_step = inputs[step].t() # 当前时刻的输入 context_step = contexts[step].t() # 当前时刻的上下文one-hot编码 # 纵向,遍历所有的layer for layer in range(self.num_layers): layer_name = 'cell{}'.format(layer) if step == 0: batch_size = inputs[step].size()[0] h, c = getattr(self, layer_name).init_state(batch_size=batch_size, hidden_size=self.hidden_size) internal_state.append((h, c)) # 前向传播 (h, c) = internal_state[layer] x_step, c_new = getattr(self, layer_name)(x_step, h, c, context_step) internal_state[layer] = (x_step, c_new) # 每一个时间步存储一个 outputs.append(x_step) # 这里返回最后一个时间步、最顶层Contextual LSTM层输出的隐状态和内部记忆 return outputs, (x_step, c_new) class MyModel(nn.Module): """Classifier Model""" def __init__(self, num_steps, num_layers, input_size, hidden_size, contextual_type, output_size, bias=True): super(MyModel, self).__init__() self.num_steps = num_steps self.num_layers = num_layers self.input_size = input_size self.hidden_size = hidden_size self.contextual_type = contextual_type self.output_size = output_size self.bias = bias self.contextual_lstm = ContextualLSTM(self.num_steps, self.num_layers, self.input_size, self.hidden_size, self.contextual_type, self.bias) self.linear = nn.Linear(self.hidden_size, self.output_size) def forward(self, inputs, contexts): output, (h_final, c_final) = self.contextual_lstm(inputs, contexts) result = torch.sigmoid(self.linear(h_final.t())) return result if __name__ == '__main__': my_model = MyModel(num_steps=5, num_layers=1, input_size=10, hidden_size=20, contextual_type=4, output_size=1, bias=True) print(my_model) for name, parameter in my_model.named_parameters(): print(name, parameter) inputs = Variable(torch.rand(5, 200, 10)) context = [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]] lines = [] line = [] for i in range(5): line = [] for j in range(200): item = random.choice(context) line.append(item) lines.append(line) content = np.array(lines) contexts = Variable(torch.FloatTensor(content)) print(contexts.shape) # 标签 label = [] for i in range(200): label.append(random.choice([0, 1])) label = np.array(label, dtype=np.int) target = Variable(torch.FloatTensor(label).unsqueeze(1)) criterion = nn.BCELoss(reduction='mean') optimizer = optim.SGD(my_model.parameters(), lr=0.001, momentum=0.9) EPOCH = 50 for i in range(EPOCH): output = my_model(inputs, contexts) loss = criterion(output, target) print('epoch', i + 1, ':', loss) loss.backward() optimizer.step() # for name, parameter in my_model.named_parameters(): # print(name, parameter)