LSTM的模型建立以及用LSTMCell构建模型的相关学习记录

在lstm的结构中,如果想要采用output[:, -1, :]获得hidden层的最后一层输出hidden_states, 那么要基于的前提条件是:

self.lstm = nn.LSTM(
            input_size, hidden_size, num_layers, batch_first=True
        )

如上图,一定要设置为batch_first = True, 否则不成立!!!

output, _ = self.lstm(x)
h_n = output[:, -1, :]

其次,如果想要进行bi-lstm的信息预测,那么在使用pytorch框架的时候,采用:

bidirectional=True

一般常用的方式是,获取了hidden_state = output[:, -1, :]之后,将hidden输出一层或多层的linear,得到一个期望的输出space的内容:

self.fc = nn.Linear(hidden_size * 2, num_classes)

real_out = self.fc(out[:, -1, :])

最终得到:
real_out

LSTM

除此之外, 还可以采用lstmcell的类来更加多样性的实现lstm(利用自己构建sine函数作为训练集和测试集,然后把数据训练lstm模型,之后采用时刻0~t的y_list预测时刻t+1的y,一直到t+n的y值):
除此之外还做了lstm在不同优化器下面的探索,欢迎交流!!!

在这里把自己的实现发出来,希望大家互相学习,视频是来自于YouTube的指导教程

import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import torch
import datetime
import numpy as np
import os

N = 100  # create sine curve, sample num
L = 1000
T = 20


optimizer_set = ["sgd"]

PATH = "save/" + str(optimizer_set[0]) + "_" + str(datetime.datetime.now())[:-7].replace(" ", "_").replace(":",
                                                                                                           "_").replace(
    "-", "_") + "/"
if not os.path.exists(PATH):
    os.mkdir(PATH)

fig_path = os.path.join(PATH, "fig")
os.mkdir(fig_path)
model_path = os.path.join(PATH, "model")
os.mkdir(model_path)

x = np.empty((N, L), np.float32)
x[:] = np.array(range(L)) + np.random.randint(-4 * T, 4 * T, N).reshape(N, 1)
y = np.sin(x / 1.0 / T).astype(np.float32)


# "sgd", "LBFGS"


# plt.plot(np.arange(len(y[1, :])), y[1, :])
# plt.show()

class LSTMPredictor(nn.Module):
    def __init__(self, n_hidden=64):
        super(LSTMPredictor, self).__init__()

        self.hidden = n_hidden
        self.lstm1 = nn.LSTMCell(1, self.hidden)  # input, x
        self.lstm2 = nn.LSTMCell(self.hidden, self.hidden)
        self.linear = nn.Linear(self.hidden, 1)  # output: y

    def forward(self, x, future=0):

        global output
        outputs = []
        n_samples = x.size(0)
        h_t = torch.zeros(n_samples, self.hidden, dtype=torch.float32)
        c_t = torch.zeros(n_samples, self.hidden, dtype=torch.float32)

        h_t2 = torch.zeros(n_samples, self.hidden, dtype=torch.float32)
        c_t2 = torch.zeros(n_samples, self.hidden, dtype=torch.float32)

        for input_t in x.split(1, dim=1):
            # N, 1
            h_t, c_t = self.lstm1(input_t, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)  # input the hidden state, and output the prediction
            outputs.append(output)

        for i in range(future):
            h_t, c_t = self.lstm1(output, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)  # input the hidden state, and output the prediction
            outputs.append(output)

        outputs = torch.cat(outputs, dim=1)
        return outputs


if __name__ == '__main__':
    # take the first 3 as test
    test_input = torch.from_numpy(y[:3, :-1])  # previous y
    test_target = torch.from_numpy(y[:3, 1:])  # next y
    # take the 4:~ as the input to train
    train_input = torch.from_numpy(y[3:, :-1])  # previous y
    train_target = torch.from_numpy(y[3:, 1:])  # next y

    model = LSTMPredictor()
    cret = nn.MSELoss()

    if optimizer_set[0] == "adam":
        n_steps = 10000
        opti = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))
    elif optimizer_set[0] == "LBFGS":
        n_steps = 10
        opti = optim.LBFGS(model.parameters(), lr=0.8)  # limitied memory, work on whole data
    elif optimizer_set[0] == "sgd":
        n_steps = 10000
        opti = optim.SGD(model.parameters(), lr=1e-3)

    # training process:
    for i in range(n_steps):
        print("step:{}".format(i))


        def closure():
            opti.zero_grad()
            out = model(train_input)
            loss = cret(out, train_target)
            print("loss:{}".format(loss))
            loss.backward()
            return loss


        opti.step(closure)
        # test with trained model. with no grad
        with torch.no_grad():
            future = 1000
            pred = model(test_input, future=future)  # feed into the first period
            loss = cret(pred[:, :-future], test_target)  # generate the next period data
            print("loss:{}".format(loss))
            y = pred.detach().numpy()


        if (i % 100 == 0):
            plt.figure(figsize=(16, 8))
            plt.title("lstm, predictor")
            plt.xlabel("x")
            plt.ylabel("y")

            plt.xticks(fontsize=20)
            plt.yticks(fontsize=20)
            n = train_input.shape[1]  # 999


            def draw(y_i, color):
                plt.plot(np.arange(n), y_i[:n], color, linewidth=2.0)
                plt.plot(np.arange(n, n + future), y_i[n:], color + ":", linewidth=2.0)


            draw(y[0], 'r')
            draw(y[1], 'b')
            draw(y[2], 'g')

            plt.savefig(fig_path + ("/predictor%d.pdf" % i))
            plt.show()
            plt.close()

    torch.save(model.state_dict(), model_path + "/model.pt")

respect!!!

你可能感兴趣的:(ml,lstm,学习,深度学习)