RNN核心思想:可以考虑输入数据前后文相关特征
import numpy as np
# 读取字典
word_map_file = open(r'model\wordmap','r',encoding='utf-8')
word2id_dict = eval(word_map_file.read())
word_map_file.close()
# print(word2id_dict)
# 文本向量化
B = 2 #文本数量(批次大小)
n_words = 5388
strs1 = '今天天气不好。'
strs2 = '所以我不出门。'
strs_id1 = [word2id_dict.get(itr) for itr in strs1]
strs_id2 = [word2id_dict.get(itr) for itr in strs2]
print(f'文本1:“{strs1}”, 对应字典中的整数:{strs_id1}')
print(f'文本2:“{strs2}”, 对应字典中的整数:{strs_id2}')
# One hot编码
T = max(len(strs1), len(strs2)) #字符串长度
C = len(word2id_dict) #字典中的字符数量,总数
strs_vect = np.zeros([B, T, C])
for idx, ids in enumerate(strs_id1):
strs_vect[0, idx, ids] = 1
for idx, ids in enumerate(strs_id2):
strs_vect[1, idx, ids] = 1
# print(strs_vect)
print(f'降维前 Size:{strs_vect.shape}')
# 降维: 向量文本乘上一个矩阵[字符数量:5388,降维的维度:128(可训练)]
enbedding_size = 128
W = np.random.normal(0, 0.1, [n_words, enbedding_size])
vect2d = np.reshape(strs_vect, [B*T, C])
out = vect2d @ W
vect = np.reshape(out, [B, T, enbedding_size]) #Embedding 过程(压缩矩阵)
print(f'降维后 Size:{vect.shape}')
###############
##### RNN #####
###############
#初始化
hidden_size = 64
rnn_w = np.random.random([enbedding_size+hidden_size, hidden_size])
rnn_b = np.zeros([hidden_size])
state = np.zeros([B, hidden_size])
#正向计算传播
outputs = []
for step in range(T):
x_t = np.concatenate([vect[:, step, :], state], axis=1)
state = np.tanh(x_t @ rnn_w + rnn_b)
outputs.append(state)
last_output = outputs[-1] #包含前面全部信息
Tensorflow实现(仅框架,没有传入数据):
import tensorflow as tf
# 超参数
batch_size = 32 # B = 32
seq_len = 100 # 文本长度, T=100
embedding_size = 128 # 降维后向量长度 C = 128
hidden_size = 128 # 隐藏层向量长度
epochs = 100 # 训练次数
# 统计量
n_words = 5388 # 字符数量
n_class = 10 # 类别数量
# 原始数据
input_ID = tf.placeholder(tf.int32, [batch_size, seq_len])
label_ID = tf.placeholder(tf.int32, [batch_size])
# 降维数据
embedding_w = tf.get_variable('embedding_w', [n_words, embedding_size])
# 传入模型数据
inputs = tf.nn.embedding_lookup(embedding_w, input_ID)
# 构建多层神经网络单元
rnn_fn = tf.nn.rnn_cell.BasicRNNCell
rnn_cell = tf.nn.rnn_cell.MultiRNNCell([
rnn_fn(hidden_size),
rnn_fn(hidden_size)
])
# 将数据输入循环神经网络
outputs, last_state = tf.nn.dynamic_rnn(rnn_cell, inputs, dtype = tf.float32)
last_out = outputs[:, 0, :]
logits = tf.layers.dense(last_out, n_class, activation= None)
label_onehot = tf.one_hot(label_ID, n_class)
loss = tf.losses.softmax_cross_entropy(label_onehot, logits)
train_step = tf.train.AdamOptimizer().minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for step in range(epochs):
sess.run(train_step, feed_dict={label_ID:..., input_ID:...})
其中: σ \sigma σ为sigmod函数, tanh \tanh tanh为双曲正切函数, 图中左上角省略了输入记忆单元 C t − 1 C_{t-1} Ct−1,左上角省略了输出记忆单元 C t C_t Ct,左下角省略了输入状态单元 h t − 1 h_{t-1} ht−1
公式如下:
h t = tanh { σ ( c o n c a t [ x t , h t − 1 ] ) × C t − 1 + σ ( c o n c a t [ x t , h t − 1 ] ) × tanh ( c o n c a t [ x t , h t − 1 ] ) } × σ ( c o n c a t [ x t , h t − 1 ] ) c t = σ ( c o n c a t [ x t , h t − 1 ] ) × C t − 1 + σ ( c o n c a t [ x t , h t − 1 ] ) × tanh ( c o n c a t [ x t , h t − 1 ] ) h_t = \tanh \left \{ \sigma(concat[x_t, h_{t-1}]) \times C_{t-1} + \sigma(concat[x_t, h_{t-1}]) \times \tanh(concat[x_t, h_{t-1}]) \right \} \times \sigma(concat[x_t, h_{t-1}])\\ c_t = \sigma(concat[x_t, h_{t-1}]) \times C_{t-1} + \sigma(concat[x_t, h_{t-1}]) \times \tanh(concat[x_t, h_{t-1}]) ht=tanh{σ(concat[xt,ht−1])×Ct−1+σ(concat[xt,ht−1])×tanh(concat[xt,ht−1])}×σ(concat[xt,ht−1])ct=σ(concat[xt,ht−1])×Ct−1+σ(concat[xt,ht−1])×tanh(concat[xt,ht−1])
其中:
GRU为简化版本的LSTM,理论上速度会有显著提升,具体算法:GRU
import tensorflow as tf
# 超参数
batch_size = 16
seq_len = 100 # 100个字
emb_size = 128 # 字符向量长度
n_layer = 2
n_hidden = 128
# 统计量
n_words = 5388 # 多少字符
n_class = 10 # 多少类
# 定义输入:长度为100的字符ID序列,类型INT
# 定义标签:每个时间步均需要做分类
inputs = tf.placeholder(tf.int32, [batch_size, seq_len])
labels = tf.placeholder(tf.int32, [batch_size, seq_len])
mask = tf.placeholder(tf.float32, [batch_size, seq_len])
# Embedding
emb_w = tf.get_variable("emb_w", [n_words, emb_size]) #是可训练的
inputs_emb = tf.nn.embedding_lookup(emb_w, inputs)
# inputs_emb是神经网络输入[batch_size(B), seq_len(T), emb_size(C)]
# 定义多层神经网络
# cell_fn = tf.nn.rnn_cell.BasicRNNCell # 基本RNN
cell_fn = tf.nn.rnn_cell.LSTMCell # LSTM
# 向前传播的单元
cell_fw = tf.nn.rnn_cell.MultiRNNCell(
[cell_fn(n_hidden) for itr in range(n_layer)])
# 反向传播的单元
cell_bw = tf.nn.rnn_cell.MultiRNNCell(
[cell_fn(n_hidden) for itr in range(n_layer)])
# 将Embedding后的向量输入循环神经网络中
outputs, last_state = tf.nn.dynamic_rnn(cell_fw, inputs_emb, dtype=tf.float32)
intputs_bw = tf.reverse(inputs_emb, 1)
outputs_bw, last_state = tf.nn.dynamic_rnn(cell_bw, intputs_bw, dtype=tf.float32)
outputs_bw = tf.reverse(outputs_bw, 1)
# # 或者使用TF中tf.nn.bidirectional_dynamic_rnn()进行搭建,不许手动将输入/输出反向
# # 双向RNN,seqlen用于不同长度序列解码
# (fw_output, bw_output), state = tf.nn.bidirectional_dynamic_rnn(
# cell_fw_cell,
# cell_bw_cell,
# emb_input,
# seqlen,
# dtype=tf.float32
# )
# outputs = tf.concat([outputs, outputs_bw], 2)
# outputs相当于Y[batch_size, seq_len, n_hidden]
# logits,每个时间步需要预测类别
logits = tf.layers.dense(outputs, 4)
# 优化过程
loss = tf.contrib.seq2seq.sequence_loss(
logits, # 网络的输出
labels, # 标签
mask # 掩码 用于选取非补0区域数据,具体操作为对补0区域乘上0权重
)
step = tf.train.AdamOptimizer().minimize(loss)