import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
# Hyper Parameters
TIME_STEP = 10 # rnn time step
INPUT_SIZE = 1 # rnn input size
LR = 0.02 # learning rate
step=np.linspace(0,np.pi*2,100,dtype=np.float32)# float32 for converting torch FloatTensor
x_np=np.sin(step)
y_np=np.cos(step)
plt.plot(step, y_np, 'r-', label='target (cos)')
plt.plot(step, x_np, 'b-', label='input (sin)')
plt.legend(loc='best')
plt.show()
input_size – 输入x的特征数量。
hidden_size – 隐层的特征数量。
num_layers – RNN的层数。
nonlinearity – 指定非线性函数使用tanh还是relu。默认是tanh。
bias – 如果是False,那么RNN层就不会使用偏置权重 b i h b_ih bih和 b h h b_hh bhh,默认是True
batch_first – 如果True的话,那么输入Tensor的shape应该是[batch_size, time_step, feature],输出也是这样。
dropout – 如果值非零,那么除了最后一层外,其它层的输出都会套上一个dropout层。
bidirectional – 如果True,将会变成一个双向RNN,默认为False。
RNN的输入: (input, h_0) - input (seq_len, batch, input_size): 保存输入序列特征的tensor。input可以是被填充的变长的序列。
h_0 (num_layers * num_directions, batch, hidden_size): 保存着初始隐状态的tensor
RNN的输出: (output, h_n)
output (seq_len, batch, hidden_size * num_directions): 保存着RNN最后一层的输出特征。如果输入是被填充过的序列,那么输出也是被填充的序列。
h_n (num_layers * num_directions, batch, hidden_size): 保存着最后一个时刻隐状态。
class RNN(nn.Module):
def __init__(self):
super(RNN,self).__init__()
self.rnn=nn.RNN(
input_size=INPUT_SIZE,
hidden_size=32,
num_layers=1,
batch_first=True
)
self.out=nn.Linear(32,1)
def forward(self,x,h_state):
# x (batch, time_step, input_size)
# h_state (n_layers, batch, hidden_size)
# r_out (batch, time_step, hidden_size)
r_out,h_state=self.rnn(x,h_state,)
outs=[]
for time_step in range(r_out.size(1)):
outs.append(self.out(r_out[:,time_step,:]))
return torch.stack(outs,dim=1),h_state
rnn = RNN()
print(rnn)
optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)
loss_func = nn.MSELoss()
h_state = None # 初始化隐藏单元为0
plt.figure(1, figsize=(12, 5))
plt.ion() # block为交互模式
for step in range(100):
start, end = step * np.pi, (step+1)*np.pi
steps = np.linspace(start, end, TIME_STEP, dtype=np.float32, endpoint=False) # [start,end)左闭右开
x_np = np.sin(steps)
y_np = np.cos(steps)
x = torch.from_numpy(x_np[np.newaxis, :, np.newaxis]) # shape (batch, time_step, input_size)
y = torch.from_numpy(y_np[np.newaxis, :, np.newaxis])
prediction, h_state = rnn(x, h_state) # rnn output
# 这步很重要
h_state = h_state.data # repack the hidden state, break the connection from last iteration
loss = loss_func(prediction, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
plt.plot(steps, y_np.flatten(), 'r-')
plt.plot(steps, prediction.data.numpy().flatten(), 'b-')
plt.draw(); plt.pause(0.05)
plt.ioff()
plt.show()
______________________________________________________________________________________我是一名机器学习的初学者,是万千小白中努力学习中的一员