给定输入,每个RNNcell使用sigmoid函数得到隐藏状态向量。 然后我们使用隐藏状态来预测每个时间步的输出:
为了训练模型,我们对每个预测的标记使用交叉熵损失:
(a) i. RNN模型: 有 个参数, 有 个参数
基于window的模型: 有 个参数
ii. 预测长T的句子标签的时间复杂度:
(b) 很难直接对F1进行优化
(c) 在q2_rnn_cell.py实现RNN cell,并运行
class RNNCell(tf.nn.rnn_cell.RNNCell):
def __init__(self,input_size,state_size):
self.input_size=input_size
self._state_size=state_size
@property
def state_size(self):
return self._state_size
@property
def output_size(self):
return self._state_size
def __call__(self, inputs, state,scope=None):
"TODO: In the code below, implement an RNN cell using @inputs"
scope=scope or type(self).__name__
with tf.variable_scope(scope):
W_x=tf.get_variable('W_x',shape=(self.input_size,self._state_size),dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
W_h=tf.get_variable('W_h',shape=(self._state_size,self._state_size),dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
b=tf.get_variable('b',shape=(self._state_size),dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
new_state=tf.nn.sigmoid(tf.matmul(state,W_h)+tf.matmul(inputs,W_x)+b)
output=new_state
return output,new_state
def test_rnn_cell():
with tf.Graph().as_default():
with tf.variable_scope("test_rnn_cell"):
x_placeholder=tf.placeholder(tf.float32,shape=(None,3))
h_placeholder=tf.placeholder(tf.float32,shape=(None,2))
with tf.variable_scope("rnn"):
tf.get_variable("W_x",initializer=np.array(np.eye(3,2),dtype=np.float32))
tf.get_variable("W_h",initializer=np.array(np.eye(2,2),dtype=np.float32))
tf.get_variable("b",initializer=np.array(np.ones(2),dtype=np.float32))
tf.get_variable_scope().reuse_variables()
cell=RNNCell(3,2)
y_var,ht_var=cell(x_placeholder,h_placeholder,scope="rnn")
init=tf.global_variables_initializer()
with tf.Session() as session:
session.run(init)
x=np.array([
[0.4,0.5,0.6],
[0.3,-0.2,-0.1]],dtype=np.float32)
h=np.array([
[0.2,0.5],
[-0.3,-0.3]],dtype=np.float32)
y=np.array([
[0.832,0.881],
[0.731,0.622]],dtype=np.float32)
ht=y
y_,ht_=session.run([y_var,ht_var],feed_dict={x_placeholder:x,h_placeholder:h})
print("y_ = "+str(y_))
print("ht_ = "+str(ht_))
assert np.allclose(y_,ht_), "output and state should be equal."
assert np.allclose(ht,ht_,atol=1e-2),"new state vector does not seem to be correct."
(d) 实现RNN要求我们在整个句子上展开计算。 但是每个句子可以任意长度,这导致RNN针对不同句子展开不同次数。使得不可能批量处理数据。解决此问题的最常见方法是使用零填充输入,假设我们输入的句子最大长度为M,对长度为T的输入:
i. 通过掩盖损失,我们将由于这些额外的 0-labels(以及它们的梯度)引起的损失归零
ii. 实现pad_sequence并运行验证
def pad_sequences(data,max_length):
"TODO: In the code below, for every sentence, labels pair in @data"
ret=[]
# Use this zero vector when padding sequences.
zero_vector=[0]*Config.n_features
zero_label=4
for sentence,labels in data:
len_senquence=len(sentence)
add_length=max_length-len_senquence
if add_length>0:
filled_sentence=sentence+([zero_vector]*add_length)
filled_labels=labels+([zero_label]*add_length)
mark=[True]*len_senquence
mark.extend([False]*add_length)
else:
mark=[True]*max_length
filled_sentence=sentence[:max_length]
filled_labels=labels[:max_length]
ret.append((filled_sentence,filled_labels,mark))
return ret
(e) RNN模型的实现
class RNNModel(NERModel):
def add_placeholders(self):
"TODO: Add these placeholders to self as the instance variables"
self.input_placeholder=tf.placeholder(
tf.int32,shape=(None,self.max_length,Config.n_features),name='input')
self.lables_placeholder=tf.placeholder(
tf.int32,shape=(None,self.max_length),name='labels')
self.mask_placeholder=tf.placeholder(
tf.bool,shape=(None,self.max_length),name='mask')
self.dropout_placeholder=tf.placeholder(
tf.float32,name='dropout')
def create_feed_dict(self, inputs_batch, mask_batch, labels_batch=None, dropout=1):
feed_dict={
self.input_placeholder:inputs_batch,
self.mask_placeholder:mask_batch,
self.dropout_placeholder:dropout
}
if labels_batch is not None:
feed_dict[self.lables_placeholder]=labels_batch
return feed_dict
def add_embedding(self):
"""Adds an embedding layer that maps from input tokens (integers) to vectors and
then concatenates those vectors"""
embeddings=tf.nn.embedding_lookup(
tf.Variable(self.pretrained_embeddings),
self.input_placeholder)
embeddings=tf.reshape(
embeddings,[-1,self.max_length,Config.n_features*Config.embed_size])
return embeddings
def add_prediction_op(self):
"""Adds the unrolled RNN:
h_0=0
for t in 1 to T:
o_t,h_t=cell(x_t,h_{t-1})
o_drop_t=Dropout(o_t,dropout_rate)
y_t=o_drop_t U+b_2"""
x=self.add_embedding()
dropout_rate=self.dropout_placeholder
preds=[]
if self.config.cell=="rnn":
cell=RNNCell(Config.n_features*Config.embed_size,Config.hidden_size)
else:
raise ValueError("Unsupported cell type"+self.config.cell)
with tf.variable_scope('Layer1'):
U=tf.get_variable('U',(Config.hidden_size,Config.n_classes),initializer=tf.contrib.layers.xavier_initializer())
b2=tf.get_variable('b2',(Config.n_classes),initializer=tf.constant_initializer(0))
input_shape=tf.shape(x)
state=tf.zeros((input_shape[0],Config.hidden_size))
with tf.variable_scope("RNN"):
for time_step in range(self.max_length):
if time_step>0:
tf.get_variable_scope().reuse_variables()
o,state=cell(x[:,time_step,:],state,scope="RNN")
o_drop=tf.nn.dropout(o,dropout_rate)
output=tf.matmul(o_drop,U)+b2
preds.append(output)
preds=tf.stack(preds,axis=1)
assert preds.get_shape().as_list()==[None,self.max_length,self.config.n_classes],\
"predictions are not of the right shape. Expected {}, got {}"\
.format([None,self.max_length,self.config.n_classes],preds.get_shape().as_list())
return preds
def add_loss_op(self, preds):
"""Adds Ops for the loss function to the computational graph."""
masked_logits=tf.boolean_mask(preds,self.mask_placeholder)
masked_labels=tf.boolean_mask(self.lables_placeholder,self.mask_placeholder)
loss=tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(logits=masked_logits,
labels=masked_labels)
)
return loss
def add_training_op(self, loss):
train_op=tf.train.AdamOptimizer(Config.lr).minimize(loss)
return train_op
def preprocess_sequence_data(self, examples):
def featurize_windows(data, start, end, window_size=1):
"""Use the input sequences in @data to construct new windowed data points."""
ret=[]
for sentence,labels in data:
from util import window_iterator
sentence_=[]
for window in window_iterator(sentence,window_size,beg=start,end=end):
sentence_.append(sum(window,[]))
ret.append((sentence_,labels))
return ret
examples=featurize_windows(examples,self.helper.START,self.helper.END)
return pad_sequences(examples,self.max_length)
def consolidate_predictions(self, examples_raw, examples, preds):
"Batch the predictions into groups of sequence length"
assert len(examples_raw)==len(examples)
assert len(examples_raw)==len(preds)
ret=[]
for i,(sentence,labels) in enumerate(examples_raw):
_,_,mask=examples[i]
labels_=[l for l,m in zip(preds[i],mask) if m]
assert len(labels_)==len(labels)
ret.append([sentence,labels,labels_])
return ret
def predict_on_batch(self, sess, inputs_batch,mask_batch):
feed=self.create_feed_dict(inputs_batch=inputs_batch,mask_batch=mask_batch)
predictions=sess.run(tf.argmax(self.pred,axis=2),feed_dict=feed)
return predictions
def train_on_batch(self, sess, inputs_batch, labels_batch,mask_batch):
feed=self.create_feed_dict(inputs_batch,labels_batch=labels_batch,mask_batch=mask_batch,
dropout=Config.dropout)
_,loss=sess.run([self.train_op,self.loss],feed_dict=feed)
return loss
def __init__(self,helper,config,pretrained_embedding,report=None):
super(RNNModel,self).__init__(helper,config,report)
self.max_length=min(Config.max_length,helper.max_length)
Config.max_length=self.max_length
self.pretrained_embeddings=pretrained_embedding
self.input_placeholder=None
self.lables_placeholder=None
self.mask_placeholder=None
self.dropout_placeholder=None
self.build()
通过python q2_rnn.py test2对实现的代码进行测试
(f) 通过python q2_rnn.py train训练模型:CPU需要两小时,GPU需要10-20分钟
模型和输出将会保存在 results/rnn/
最后,可以使用以下方式与模型进行交互:python q2_rnn.py shell -m results/rnn/
(g) RNN模型的限制