PyTorch入门八 || 循环神经网络(基础)

PyTorch入门八 || 循环神经网络(基础)

RNN专门用于处理带序列模式的数据,如天气预测,自然语言处理

预测降雨的例子

给一个气象表,包含(温度|气压|是否下雨)这三个信息字段

我们利用前三天的气象信息来预测今天是否会下雨,那么需要将数据划分为四个一组,前三个作为input,第四个作为标签,是否下雨。


原理图

PyTorch入门八 || 循环神经网络(基础)_第1张图片

h0 与 x1 作为输入 经过 RNN Cell 得到输出h1,再将h1和x2作为输入 经过RNN Cell得到h2输出,一直循环下去,h0可以是CNN的输出,这样就将CNN与RNN接到一起,实现图片转文字,也可以将h0设置为与 h1维度相同的向量,初始为0,作为输入;

其中RNN Cell 其实就是一个LinearLayer,这几个RNN Cell都是同一个

PyTorch入门八 || 循环神经网络(基础)_第2张图片

image-20220207123707301

如何使用RNN Cell
import torch

batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2

cell = torch.nn.RNNCell(input_size=input_size,hidden_size=hidden_size)

dataset = torch.randn(seq_len,batch_size,input_size)
hidden = torch.zeros(batch_size,hidden_size)    #全0张量

for idx,input in enumerate(dataset):
    print('='*20,idx,'='*20)
    print('Input size:',input.shape)

    hidden = cell(input,hidden)

    print('outputs size:',hidden.shape)
    print(hidden)

如何使用RNN

PyTorch入门八 || 循环神经网络(基础)_第3张图片

使用RNN 时就是代码自动做循环

import torch

batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 1

cell = torch.nn.RNN(input_size=input_size,hidden_size=hidden_size,
                    num_layers=num_layers)

inputs = torch.randn(seq_len,batch_size,input_size)
hidden = torch.zeros(num_layers,batch_size,hidden_size)    #全0张量

out,hidden = cell(inputs,hidden)

print('outputs size:',out.shape)
print('output:',out)
print('hidden size:',hidden.shape)
print('hidden:',hidden)
多层RNN结构

PyTorch入门八 || 循环神经网络(基础)_第4张图片

同样颜色的RNN Cell 是同一个


例子

PyTorch入门八 || 循环神经网络(基础)_第5张图片

第一步:将字符向量化

PyTorch入门八 || 循环神经网络(基础)_第6张图片

第二步:训练,如何计算loss

PyTorch入门八 || 循环神经网络(基础)_第7张图片

将RNN看作一个分类的分布,与分类算法的交叉熵loss一致

code:RNNCell版

import torch

batch_size = 1
input_size = 4
hidden_size = 4

idx2char = ['e','h','l','o']
x_data = [1,0,2,2,3]
y_data = [3,1,2,3,2]

one_hot_lookup = [[1,0,0,0],
                  [0,1,0,0],
                  [0,0,1,0],
                  [0,0,0,1]]

x_one_hot = [one_hot_lookup[x] for x in x_data]

inputs = torch.Tensor(x_one_hot).view(-1,batch_size,input_size)
labels = torch.LongTensor(y_data).view(-1,1)

class Model(torch.nn.Module):
    def __init__(self,input_size,hidden_size,batch_size):
        super(Model, self).__init__()

        #batch_size 只有在构造 h0 的时候才需要
        self.batch_size = batch_size
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.rnncell = torch.nn.RNNCell(input_size=self.input_size,
                                        hidden_size=self.hidden_size)

    def forward(self,input,hidden):
        hidden = self.rnncell(input,hidden)
        return hidden

    #生成初始 h0 向量
    def init_hidden(self):
        return torch.zeros(self.batch_size,self.hidden_size)

net = Model(input_size,hidden_size,batch_size)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=0.1)

if __name__=='__main__':
    for epoch in range(15):
        loss = 0
        optimizer.zero_grad()
        hidden = net.init_hidden()
        print('Predicted string:',end='')
        for input,label in zip(inputs,labels):
            hidden = net(input,hidden)
            loss += criterion(hidden,label)
            _,idx = hidden.max(dim=1)
            print(idx2char[idx.item()],end='')
        loss.backward()
        optimizer.step()
        print(',Epoch [%d/15] loss=%.4f'%(epoch+1,loss.item()))

code:RNN版

import torch

batch_size = 1
input_size = 4
hidden_size = 4
num_layers = 1
seq_len = 5

idx2char = ['e','h','l','o']
x_data = [1,0,2,2,3]
y_data = [3,1,2,3,2]

one_hot_lookup = [[1,0,0,0],
                  [0,1,0,0],
                  [0,0,1,0],
                  [0,0,0,1]]

x_one_hot = [one_hot_lookup[x] for x in x_data]

inputs = torch.Tensor(x_one_hot).view(seq_len,batch_size,input_size)
labels = torch.LongTensor(y_data)

class Model(torch.nn.Module):
    def __init__(self,input_size,hidden_size,batch_size,num_layers=1):
        super(Model, self).__init__()

        #batch_size 只有在构造 h0 的时候才需要
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.rnn = torch.nn.RNN(input_size=self.input_size,
                                hidden_size=self.hidden_size,
                                num_layers=num_layers)

    def forward(self,input):

        hidden = torch.zeros(self.num_layers,self.batch_size,self.hidden_size)
        out,_ = self.rnn(input,hidden)
        # inputs的维度 (seq_len,batch,input_size)
        # out的维度 (seq_len,batch,hidden_size)
        # _的维度 (num_layer,batch,hidden_size)
        return out.view(-1,self.hidden_size)

net = Model(input_size,hidden_size,batch_size,num_layers)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=0.05)

if __name__=='__main__':
    for epoch in range(50):
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs,labels)
        loss.backward()
        optimizer.step()

        _,idx = outputs.max(dim=1)
        idx = idx.data.numpy()
        print('Predicted:',''.join([idx2char[x] for x in idx]),end='')

        print(',Epoch [%d/15] loss=%.4f'%(epoch+1,loss.item()))

改进:

独热向量具有一些缺点,比如密度低,维度高,硬编码。因此引入Embedding的概念

PyTorch入门八 || 循环神经网络(基础)_第8张图片

网络结构

PyTorch入门八 || 循环神经网络(基础)_第9张图片

import torch

batch_size = 1
input_size = 4
hidden_size = 8
num_layers = 2
seq_len = 5
embedding_size = 10
num_class = 4

idx2char = ['e','h','l','o']
x_data = [[1,0,2,2,3]]
y_data = [3,1,2,3,2]

inputs = torch.LongTensor(x_data)
labels = torch.LongTensor(y_data)

class Model2(torch.nn.Module):
    def __init__(self):
        super(Model2, self).__init__()
        self.emb = torch.nn.Embedding(input_size,embedding_size)
        self.rnn = torch.nn.RNN(input_size=embedding_size,
                                hidden_size=hidden_size,
                                num_layers=num_layers,
                                batch_first=True)
        self.fc = torch.nn.Linear(hidden_size,num_class)

    def forward(self,x):
        hidden = torch.zeros(num_layers,x.size(0),hidden_size)
        x = self.emb(x)
        x,_ = self.rnn(x,hidden)
        x = self.fc(x)
        return x.view(-1,num_class)

net = Model2()

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=0.05)

if __name__=='__main__':
    for epoch in range(15):
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs,labels)
        loss.backward()
        optimizer.step()

        _,idx = outputs.max(dim=1)
        idx = idx.data.numpy()
        print('Predicted:',''.join([idx2char[x] for x in idx]),end='')

        print(',Epoch [%d/15] loss=%.4f'%(epoch+1,loss.item()))

新增:用正弦曲线模拟余弦曲线

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt

input_size = 1
batch_size = 1
num_layers = 1
hidden_size = 32
hidden0 = torch.zeros(batch_size,num_layers,hidden_size)
seq = 10

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.rnn = nn.RNN(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True
        )
        self.linear1 = nn.Linear(hidden_size,16)
        self.linear2 = nn.Linear(16,1)


    def forward(self,x,h_state):
        x,hidden = self.rnn(x,h_state)
        # input的维度 (batch,seq,input_size)
        # x的维度 (batch,seq,hidden_size)
        # _的维度 (batch,num_layer,hidden_size)

        outs = []
        for seq in range(x.size(1)):
            #遍历每一个seq进行线性层变换
            outs.append(self.linear2(F.relu(self.linear1(x[:,seq,:]))))

        return torch.stack(outs,dim=1),h_state

model = Net()
print(model)

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.02)

plt.figure(1, figsize=(12, 5))
plt.ion()

for step in range(100):
    start,end = step*np.pi,(step+1)*np.pi
    steps = np.linspace(start,end,seq,dtype=np.float32,endpoint=False)
    x_np = np.sin(steps)
    y_np = np.cos(steps)

    x = torch.from_numpy(x_np[np.newaxis,:,np.newaxis])
    y = torch.from_numpy(y_np[np.newaxis,:,np.newaxis])

    prediction,h_state = model(x,hidden0)
    h_state = h_state.data

    optimizer.zero_grad()
    loss = criterion(prediction,y)
    loss.backward()
    optimizer.step()

    plt.plot(steps, y_np.flatten(), 'r-')
    plt.plot(steps, prediction.data.numpy().flatten(), 'b-')
    plt.draw(); plt.pause(0.05)

plt.ioff()
plt.show()

你可能感兴趣的:(深度学习,pytorch,rnn,深度学习)