循环神经网络(Recurrent Neural Network, RNN)是一种适宜于处理序列数据的神经网络,被广泛用于语言模型、文本生成、机器翻译等。
SimpleRNN、LSTM、GRU
import tensorflow as tf
tf.keras.layers.SimpleRNN
RNN主要参数说明:
model =tf.keras.Sequential([
tf.keras.layers.SimpleRNN(64),
tf.keras.layers.Dense(1, activation='sigmoid')])
tf.keras.layers.LSTM()
主要参数说明:
return_sequences和return_state参数设置情况:
import numpy as np
data = np.array([0.1,0.2,0.3]).reshape((1,3,1)) # [batchisize,time_step,feature_dim]
(1)return_sequences=False,return_state=False
input_data = tf.keras.Input(shape=(3,1))
output = tf.keras.layers.LSTM(1,return_sequences=False,return_state=False,
kernel_initializer=tf.ones_initializer(),
recurrent_initializer=tf.ones_initializer())(input_data)
model = tf.keras.Model(inputs = input_data,outputs = output)
resut = model.predict(data)
print(resut) # [batchsize,hidden_state] 输出序列最后一个输出
print(resut.shape)
[[0.20281446]]
(1, 1)
model1 = tf.keras.Sequential([
tf.keras.layers.LSTM(1,return_sequences=False,return_state=False,
kernel_initializer=tf.ones_initializer(),
recurrent_initializer=tf.ones_initializer()),
])
resut = model1.predict(data) # [batchsize,hidden_state] 输出序列最后一个输出
print(resut)
print(resut.shape)
[[0.20281446]]
(1, 1)
(2)return_sequences=True,return_state=False
input_data = tf.keras.Input(shape=(3,1))
output = tf.keras.layers.LSTM(1,return_sequences=True,return_state=False,
kernel_initializer=tf.ones_initializer(),
recurrent_initializer=tf.ones_initializer())(input_data)
model = tf.keras.Model(inputs = input_data,outputs = output)
resut = model.predict(data)
print(resut) # [batchsize,timesteps,hidden_state] 输出序列中每个时序的输出结果
print(resut.shape)
[[[0.02744377]
[0.09097716]
[0.20281446]]]
(1, 3, 1)
(3)return_sequences=False,return_state=True
input_data = tf.keras.Input(shape=(3,1))
output,state_h,state_c = tf.keras.layers.LSTM(1,return_sequences=False,return_state=True,
kernel_initializer=tf.ones_initializer(),
recurrent_initializer=tf.ones_initializer())(input_data)
model2 = tf.keras.Model(inputs = input_data,outputs = output)
resut = model2.predict(data)
print(resut) # [batchsize,hidden_state] 输出序列最后一个输出
print(resut.shape)
[[0.20281446]]
(1, 1)
下面例子会报错
# model3 = tf.keras.Sequential([
# tf.keras.layers.LSTM(1, return_sequences=False,return_state=True,
# kernel_initializer=tf.ones_initializer(),
# recurrent_initializer=tf.ones_initializer()),
# ])
# model3.predict(data)
(4)return_sequences = True,return_state = True
input_data = tf.keras.Input(shape=(3,1))
output,state_h,state_c = tf.keras.layers.LSTM(1,return_sequences = True,return_state = True,
kernel_initializer=tf.ones_initializer(),
recurrent_initializer=tf.ones_initializer())(input_data)
model2 = tf.keras.Model(inputs = input_data,outputs = output)
resut1 = model2.predict(data)
print(resut1) # [batchsize,timesteps,hidden_state] 输出序列中每个时序的输出结果
print(resut1.shape)
[[[0.02744377]
[0.09097716]
[0.20281446]]]
(1, 3, 1)
下面例子会报错
# model3 = tf.keras.Sequential([
# tf.keras.layers.LSTM(1,return_sequences = True,return_state = True,
# kernel_initializer=tf.ones_initializer(),
# recurrent_initializer=tf.ones_initializer()),
# ])
# model3.predict(data)
input_data = tf.keras.Input(shape=(3,1))
output1= tf.keras.layers.LSTM(1,return_sequences =True,return_state=False,
kernel_initializer=tf.ones_initializer(),
recurrent_initializer=tf.ones_initializer)(input_data)
output2 = tf.keras.layers.LSTM(1,return_sequences=False,return_state=False,
kernel_initializer=tf.ones_initializer(),
recurrent_initializer=tf.ones_initializer)(output1)
model = tf.keras.Model(inputs = input_data,outputs = output2)
resut = model.predict(data)
print(resut) # [batchsize,hidden_state] 输出序列最后一个输出
print(resut.shape)
[[0.09769259]]
(1, 1)
model = tf.keras.Sequential([
tf.keras.layers.LSTM(1,return_sequences = True,return_state = False,
kernel_initializer=tf.ones_initializer(),
recurrent_initializer=tf.ones_initializer),
tf.keras.layers.LSTM(1,return_sequences = True,return_state = False,
kernel_initializer=tf.ones_initializer(),
recurrent_initializer=tf.ones_initializer),
])
model.predict(data)
array([[[0.00704829],
[0.03230806],
[0.09769259]]], dtype=float32)
将遗忘门和输入门合成了一个单一的更新门,混合了细胞状态和隐藏状态,GRU比LSTM少了一个状态输出。Reset gate 决定有多少之前的记忆需要和新的输入结合,而 update gate 决定多少之前的记忆被保留下来
input_data = tf.keras.Input(shape=(3,1))
output,state = tf.keras.layers.GRU(1,return_sequences = True,return_state = True,
kernel_initializer=tf.ones_initializer(),
recurrent_initializer=tf.ones_initializer())(input_data) # 注意:状态混合了细胞状态和隐藏状态
model2 = tf.keras.Model(inputs = input_data,outputs = output)
resut1 = model2.predict(data)
print(resut1) # [batchsize,timesteps,hidden_state] 输出序列中每个时序的输出结果
print(resut1.shape)
[[[0.04734437]
[0.12427121]
[0.21689339]]]
(1, 3, 1)
model3 = tf.keras.Sequential([
tf.keras.layers.GRU(1,return_sequences = True,return_state = False),
tf.keras.layers.GRU(1,return_sequences = True,return_state = False),
])
model3.predict(data)
array([[[-0.01829882],
[-0.06010304],
[-0.12294452]]], dtype=float32)
tf.keras.layers.Bidirectional
用于对序列进行前向和后向计算
主要参数:
model = tf.keras.Sequential([
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(2,return_sequences=True,
kernel_initializer=tf.ones_initializer(),
recurrent_initializer=tf.ones_initializer()), input_shape=(3,1)),
tf.keras.layers.Dense(2)
])
resut1 = model.predict(data)
print(resut1)
print(resut1.shape)
[[[ 0.05966149 -0.00104126]
[-0.11142851 0.05025645]
[-0.42164198 0.15016954]]]
(1, 3, 2)
注意:return_sequences=True时,返回的是前向和后向每个时刻输出的值,return_sequences=False时,返回的是将后向的结果逆转后与前向结果拼接
input_data = tf.keras.Input(shape=(3,1))
output = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(1, return_sequences=False,return_state=False,
kernel_initializer=tf.ones_initializer(),
recurrent_initializer=tf.ones_initializer()))(input_data)
model2 = tf.keras.Model(inputs = input_data,outputs = output)
resut1 = model2.predict(data)
print(resut1)
print(resut1.shape)
[[0.20281446 0.20384209]]
(1, 2)
多层BiLSTM
input_data = tf.keras.Input(shape=(3,1))
output1= tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(1, return_sequences=True))(input_data)
output2 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(1))(output1)
model = tf.keras.Model(inputs = input_data,outputs = output2)
resut = model.predict(data)
print(resut) # [batchsize,hidden_state] 输出序列最后一个输出
print(resut.shape)
[[0.0067467 0.02036347]]
(1, 2)