在lstm的结构中,如果想要采用output[:, -1, :]获得hidden层的最后一层输出hidden_states, 那么要基于的前提条件是:
self.lstm = nn.LSTM(
input_size, hidden_size, num_layers, batch_first=True
)
如上图,一定要设置为batch_first = True, 否则不成立!!!
output, _ = self.lstm(x)
h_n = output[:, -1, :]
其次,如果想要进行bi-lstm的信息预测,那么在使用pytorch框架的时候,采用:
bidirectional=True
一般常用的方式是,获取了hidden_state = output[:, -1, :]之后,将hidden输出一层或多层的linear,得到一个期望的输出space的内容:
self.fc = nn.Linear(hidden_size * 2, num_classes)
real_out = self.fc(out[:, -1, :])
最终得到:
real_out
LSTM
除此之外, 还可以采用lstmcell的类来更加多样性的实现lstm(利用自己构建sine函数作为训练集和测试集,然后把数据训练lstm模型,之后采用时刻0~t的y_list预测时刻t+1的y,一直到t+n的y值):
除此之外还做了lstm在不同优化器下面的探索,欢迎交流!!!
在这里把自己的实现发出来,希望大家互相学习,视频是来自于YouTube的指导教程
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import torch
import datetime
import numpy as np
import os
N = 100 # create sine curve, sample num
L = 1000
T = 20
optimizer_set = ["sgd"]
PATH = "save/" + str(optimizer_set[0]) + "_" + str(datetime.datetime.now())[:-7].replace(" ", "_").replace(":",
"_").replace(
"-", "_") + "/"
if not os.path.exists(PATH):
os.mkdir(PATH)
fig_path = os.path.join(PATH, "fig")
os.mkdir(fig_path)
model_path = os.path.join(PATH, "model")
os.mkdir(model_path)
x = np.empty((N, L), np.float32)
x[:] = np.array(range(L)) + np.random.randint(-4 * T, 4 * T, N).reshape(N, 1)
y = np.sin(x / 1.0 / T).astype(np.float32)
# "sgd", "LBFGS"
# plt.plot(np.arange(len(y[1, :])), y[1, :])
# plt.show()
class LSTMPredictor(nn.Module):
def __init__(self, n_hidden=64):
super(LSTMPredictor, self).__init__()
self.hidden = n_hidden
self.lstm1 = nn.LSTMCell(1, self.hidden) # input, x
self.lstm2 = nn.LSTMCell(self.hidden, self.hidden)
self.linear = nn.Linear(self.hidden, 1) # output: y
def forward(self, x, future=0):
global output
outputs = []
n_samples = x.size(0)
h_t = torch.zeros(n_samples, self.hidden, dtype=torch.float32)
c_t = torch.zeros(n_samples, self.hidden, dtype=torch.float32)
h_t2 = torch.zeros(n_samples, self.hidden, dtype=torch.float32)
c_t2 = torch.zeros(n_samples, self.hidden, dtype=torch.float32)
for input_t in x.split(1, dim=1):
# N, 1
h_t, c_t = self.lstm1(input_t, (h_t, c_t))
h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
output = self.linear(h_t2) # input the hidden state, and output the prediction
outputs.append(output)
for i in range(future):
h_t, c_t = self.lstm1(output, (h_t, c_t))
h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
output = self.linear(h_t2) # input the hidden state, and output the prediction
outputs.append(output)
outputs = torch.cat(outputs, dim=1)
return outputs
if __name__ == '__main__':
# take the first 3 as test
test_input = torch.from_numpy(y[:3, :-1]) # previous y
test_target = torch.from_numpy(y[:3, 1:]) # next y
# take the 4:~ as the input to train
train_input = torch.from_numpy(y[3:, :-1]) # previous y
train_target = torch.from_numpy(y[3:, 1:]) # next y
model = LSTMPredictor()
cret = nn.MSELoss()
if optimizer_set[0] == "adam":
n_steps = 10000
opti = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))
elif optimizer_set[0] == "LBFGS":
n_steps = 10
opti = optim.LBFGS(model.parameters(), lr=0.8) # limitied memory, work on whole data
elif optimizer_set[0] == "sgd":
n_steps = 10000
opti = optim.SGD(model.parameters(), lr=1e-3)
# training process:
for i in range(n_steps):
print("step:{}".format(i))
def closure():
opti.zero_grad()
out = model(train_input)
loss = cret(out, train_target)
print("loss:{}".format(loss))
loss.backward()
return loss
opti.step(closure)
# test with trained model. with no grad
with torch.no_grad():
future = 1000
pred = model(test_input, future=future) # feed into the first period
loss = cret(pred[:, :-future], test_target) # generate the next period data
print("loss:{}".format(loss))
y = pred.detach().numpy()
if (i % 100 == 0):
plt.figure(figsize=(16, 8))
plt.title("lstm, predictor")
plt.xlabel("x")
plt.ylabel("y")
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
n = train_input.shape[1] # 999
def draw(y_i, color):
plt.plot(np.arange(n), y_i[:n], color, linewidth=2.0)
plt.plot(np.arange(n, n + future), y_i[n:], color + ":", linewidth=2.0)
draw(y[0], 'r')
draw(y[1], 'b')
draw(y[2], 'g')
plt.savefig(fig_path + ("/predictor%d.pdf" % i))
plt.show()
plt.close()
torch.save(model.state_dict(), model_path + "/model.pt")
respect!!!