x = tf.random.normal([4, 80, 100])
xt0 = x[:, 0, :]
cell = tf.keras.layers.SimpleRNNCell(64)
out, xt1 = cell(xt0, [tf.zeros([4, 64])])
out.shape, xt1[0].shape
id(out), id(xt1[0])
cell.trainable_variables
x = tf.random.normal([4, 80, 100])
xt0 = x[:, 0, :]
cell = tf.keras.layers.SimpleRNNCell(64)
cell2 = tf.keras.layers.SimpleRNNCell(64)
state0 = [tf.zeros([4, 64])]
state1 = [tf.zeros([4, 64])]
out0, state0 = cell(xt0, state0)
out1, state1 = cell2(out0, state1)
out1.shape, state1[0].shape
import os
import tensorflow as tf
import numpy as np
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0
tf.config.experimental.set_memory_growth(physical_devices[0], True)
tf.random.set_seed(22)
np.random.seed(22)
batchsz = 128
# the most frequest words
total_words = 10000
max_review_len = 80
embedding_len = 100
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
# x_train: [b, 80]
# x_test: [b, 80]
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.shuffle(1000).batch(batchsz, drop_remainder=True)
test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_data = test_data.batch(batchsz, drop_remainder=True)
print('x_train shape', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)
class MyRNN(tf.keras.Model):
def __init__(self, units):
super(MyRNN, self).__init__()
self.state0 = [tf.zeros([batchsz, units])]
self.state1 = [tf.zeros([batchsz, units])]
# transform text to embedding representation
# [b, 80] => [b, 80, 100]
self.embedding = tf.keras.layers.Embedding(total_words, embedding_len,
input_length=max_review_len)
# [b, 80, 100], h_dim: 64
# RNN: cell1, cell2, cell3
# SimpleRNN
self.rnn_cell0 = tf.keras.layers.SimpleRNNCell(units, dropout=0.5)
self.rnn_cell1 = tf.keras.layers.SimpleRNNCell(units, dropout=0.5)
# fc, [b, 80, 100] => [b, 64] => [b, 1]
self.out_layer = tf.keras.layers.Dense(1)
def call(self, inputs, training=None, mask=None):
"""
net(x) net(x, training=True): train mode
net(x, training=False): test
:param inputs: [b, 80]
:param training:
:param mask:
:return:
"""
# [b, 80}
x = inputs
# embedding: [b, 80] => [b, 80, 100]
x = self.embedding(x)
# run cell compute
# [b, 80, 100] => [b, 64]
state0 = self.state0
state1 = self.state1
for word in tf.unstack(x, axis=1): # word: [b, 100]
# h1 = x * wxh + h0 * whh
# out0: [b, 64]
out0, state0 = self.rnn_cell0(word, state0, training)
# out1: [b, 64]
out1, state1 = self.rnn_cell1(out0, state1, training)
# out: [b, 64] => [b, 1]
x = self.out_layer(out1)
# p(y is pos|x)
prob = tf.sigmoid(x)
return prob
def main():
units = 64
epochs = 4
model = MyRNN(units)
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
loss=tf.losses.BinaryCrossentropy(),
experimental_run_tf_function=False,
metrics=['accuracy'])
model.fit(train_data, epochs=epochs, validation_data=test_data, validation_freq=1)
model.evaluate(test_data)
if __name__ == '__main__':
main()
import os
import time
import tensorflow as tf
import numpy as np
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0
tf.config.experimental.set_memory_growth(physical_devices[0], True)
tf.random.set_seed(22)
np.random.seed(22)
batchsz = 128
# the most frequest words
total_words = 10000
max_review_len = 80
embedding_len = 100
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
# x_train: [b, 80]
# x_test: [b, 80]
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.shuffle(1000).batch(batchsz, drop_remainder=True)
test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_data = test_data.batch(batchsz, drop_remainder=True)
print('x_train shape', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)
class MyRNN(tf.keras.Model):
def __init__(self, units):
super(MyRNN, self).__init__()
# transform text to embedding representation
# [b, 80] => [b, 80, 100]
self.embedding = tf.keras.layers.Embedding(total_words, embedding_len,
input_length=max_review_len)
# [b, 80, 100], h_dim: 64
# RNN: cell1, cell2, cell3
# SimpleRNN
# unroll = True 可以加快RNN
self.rnn = tf.keras.Sequential([
tf.keras.layers.SimpleRNN(units, dropout=0.5, return_sequences=True),
tf.keras.layers.SimpleRNN(units, dropout=0.5)
])
# fc, [b, 80, 100] => [b, 64] => [b, 1]
self.out_layer = tf.keras.layers.Dense(1)
def call(self, inputs, training=None, mask=None):
"""
net(x) net(x, training=True): train mode
net(x, training=False): test
:param inputs: [b, 80]
:param training:
:param mask:
:return:
"""
# [b, 80}
x = inputs
# embedding: [b, 80] => [b, 80, 100]
x = self.embedding(x)
# run cell compute
# [b, 80, 100] => [b, 64]
x = self.rnn(x)
# out: [b, 64] => [b, 1]
x = self.out_layer(x)
# p(y is pos|x)
prob = tf.sigmoid(x)
return prob
def main():
units = 64
epochs = 4
t0 = time.time()
model = MyRNN(units)
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
loss=tf.losses.BinaryCrossentropy(),
metrics=['accuracy'])
model.fit(train_data, epochs=epochs, validation_data=test_data, validation_freq=1)
model.evaluate(test_data)
# accuracy: 0.8107 total time cost: 39.90
t1 = time.time()
print('total time cost:', t1 - t0)
if __name__ == '__main__':
main()
import os
import time
import tensorflow as tf
import numpy as np
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0
tf.config.experimental.set_memory_growth(physical_devices[0], True)
tf.random.set_seed(22)
np.random.seed(22)
batchsz = 128
# the most frequest words
total_words = 10000
max_review_len = 80
embedding_len = 100
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
# x_train: [b, 80]
# x_test: [b, 80]
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.shuffle(1000).batch(batchsz, drop_remainder=True)
test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_data = test_data.batch(batchsz, drop_remainder=True)
print('x_train shape', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)
class MyRNN(tf.keras.Model):
def __init__(self, units):
super(MyRNN, self).__init__()
self.state0 = [tf.zeros([batchsz, units]), tf.zeros([batchsz, units])]
self.state1 = [tf.zeros([batchsz, units]), tf.zeros([batchsz, units])]
# transform text to embedding representation
# [b, 80] => [b, 80, 100]
self.embedding = tf.keras.layers.Embedding(total_words, embedding_len,
input_length=max_review_len)
# [b, 80, 100], h_dim: 64
# RNN: cell1, cell2, cell3
# SimpleRNN
self.rnn_cell0 = tf.keras.layers.LSTMCell(units, dropout=0.5)
self.rnn_cell1 = tf.keras.layers.LSTMCell(units, dropout=0.5)
# fc, [b, 80, 100] => [b, 64] => [b, 1]
self.out_layer = tf.keras.layers.Dense(1)
def call(self, inputs, training=None, mask=None):
"""
net(x) net(x, training=True): train mode
net(x, training=False): test
:param inputs: [b, 80]
:param training:
:param mask:
:return:
"""
# [b, 80}
x = inputs
# embedding: [b, 80] => [b, 80, 100]
x = self.embedding(x)
# run cell compute
# [b, 80, 100] => [b, 64]
state0 = self.state0
state1 = self.state1
for word in tf.unstack(x, axis=1): # word: [b, 100]
# h1 = x * wxh + h0 * whh
# out0: [b, 64]
out0, state0 = self.rnn_cell0(word, state0, training)
# out1: [b, 64]
out1, state1 = self.rnn_cell1(out0, state1, training)
# out: [b, 64] => [b, 1]
x = self.out_layer(out1)
# p(y is pos|x)
prob = tf.sigmoid(x)
return prob
def main():
units = 64
epochs = 4
t0 = time.time()
model = MyRNN(units)
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
loss=tf.losses.BinaryCrossentropy(),
experimental_run_tf_function=False,
metrics=['accuracy'])
model.fit(train_data, epochs=epochs, validation_data=test_data, validation_freq=1)
model.evaluate(test_data)
t1 = time.time()
print('total time cost:', t1 - t0)
if __name__ == '__main__':
main()
import os
import time
import tensorflow as tf
import numpy as np
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0
tf.config.experimental.set_memory_growth(physical_devices[0], True)
tf.random.set_seed(22)
np.random.seed(22)
batchsz = 128
# the most frequest words
total_words = 10000
max_review_len = 80
embedding_len = 100
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
# x_train: [b, 80]
# x_test: [b, 80]
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.shuffle(1000).batch(batchsz, drop_remainder=True)
test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_data = test_data.batch(batchsz, drop_remainder=True)
print('x_train shape', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)
class MyRNN(tf.keras.Model):
def __init__(self, units):
super(MyRNN, self).__init__()
# transform text to embedding representation
# [b, 80] => [b, 80, 100]
self.embedding = tf.keras.layers.Embedding(total_words, embedding_len,
input_length=max_review_len)
# [b, 80, 100], h_dim: 64
# RNN: cell1, cell2, cell3
# SimpleRNN
self.rnn = tf.keras.Sequential([
tf.keras.layers.LSTM(units, dropout=0.5, return_sequences=True),
tf.keras.layers.LSTM(units, dropout=0.5)
])
# fc, [b, 80, 100] => [b, 64] => [b, 1]
self.out_layer = tf.keras.layers.Dense(1)
def call(self, inputs, training=None, mask=None):
"""
net(x) net(x, training=True): train mode
net(x, training=False): test
:param inputs: [b, 80]
:param training:
:param mask:
:return:
"""
# [b, 80}
x = inputs
# embedding: [b, 80] => [b, 80, 100]
x = self.embedding(x)
# run cell compute
# [b, 80, 100] => [b, 64]
x = self.rnn(x)
# out: [b, 64] => [b, 1]
x = self.out_layer(x)
# p(y is pos|x)
prob = tf.sigmoid(x)
return prob
def main():
units = 64
epochs = 4
t0 = time.time()
model = MyRNN(units)
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
loss=tf.losses.BinaryCrossentropy(),
metrics=['accuracy'])
model.fit(train_data, epochs=epochs, validation_data=test_data, validation_freq=1)
model.evaluate(test_data)
# accuracy: 0.8124 total time cost: 25.03
t1 = time.time()
print('total time cost:', t1 - t0)
if __name__ == '__main__':
main()
GRU Cell
import os
import time
import tensorflow as tf
import numpy as np
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0
tf.config.experimental.set_memory_growth(physical_devices[0], True)
tf.random.set_seed(22)
np.random.seed(22)
batchsz = 128
# the most frequest words
total_words = 10000
max_review_len = 80
embedding_len = 100
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
# x_train: [b, 80]
# x_test: [b, 80]
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.shuffle(1000).batch(batchsz, drop_remainder=True)
test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_data = test_data.batch(batchsz, drop_remainder=True)
print('x_train shape', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)
class MyRNN(tf.keras.Model):
def __init__(self, units):
super(MyRNN, self).__init__()
self.state0 = [tf.zeros([batchsz, units])]
self.state1 = [tf.zeros([batchsz, units])]
# transform text to embedding representation
# [b, 80] => [b, 80, 100]
self.embedding = tf.keras.layers.Embedding(total_words, embedding_len,
input_length=max_review_len)
# [b, 80, 100], h_dim: 64
# RNN: cell1, cell2, cell3
# SimpleRNN
self.rnn_cell0 = tf.keras.layers.GRUCell(units, dropout=0.5)
self.rnn_cell1 = tf.keras.layers.GRUCell(units, dropout=0.5)
# fc, [b, 80, 100] => [b, 64] => [b, 1]
self.out_layer = tf.keras.layers.Dense(1)
def call(self, inputs, training=None, mask=None):
"""
net(x) net(x, training=True): train mode
net(x, training=False): test
:param inputs: [b, 80]
:param training:
:param mask:
:return:
"""
# [b, 80}
x = inputs
# embedding: [b, 80] => [b, 80, 100]
x = self.embedding(x)
# run cell compute
# [b, 80, 100] => [b, 64]
state0 = self.state0
state1 = self.state1
for word in tf.unstack(x, axis=1): # word: [b, 100]
# h1 = x * wxh + h0 * whh
# out0: [b, 64]
out0, state0 = self.rnn_cell0(word, state0, training)
# out1: [b, 64]
out1, state1 = self.rnn_cell1(out0, state1, training)
# out: [b, 64] => [b, 1]
x = self.out_layer(out1)
# p(y is pos|x)
prob = tf.sigmoid(x)
return prob
def main():
units = 64
epochs = 4
t0 = time.time()
model = MyRNN(units)
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
loss=tf.losses.BinaryCrossentropy(),
experimental_run_tf_function=False,
metrics=['accuracy'])
model.fit(train_data, epochs=epochs, validation_data=test_data, validation_freq=1)
model.evaluate(test_data)
t1 = time.time()
print('total time cost:', t1 - t0)
if __name__ == '__main__':
main()
GRU Layer
import os
import time
import tensorflow as tf
import numpy as np
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0
tf.config.experimental.set_memory_growth(physical_devices[0], True)
tf.random.set_seed(22)
np.random.seed(22)
batchsz = 128
# the most frequest words
total_words = 10000
max_review_len = 80
embedding_len = 100
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
# x_train: [b, 80]
# x_test: [b, 80]
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.shuffle(1000).batch(batchsz, drop_remainder=True)
test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_data = test_data.batch(batchsz, drop_remainder=True)
print('x_train shape', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)
class MyRNN(tf.keras.Model):
def __init__(self, units):
super(MyRNN, self).__init__()
# transform text to embedding representation
# [b, 80] => [b, 80, 100]
self.embedding = tf.keras.layers.Embedding(total_words, embedding_len,
input_length=max_review_len)
# [b, 80, 100], h_dim: 64
# RNN: cell1, cell2, cell3
# SimpleRNN
self.rnn = tf.keras.Sequential([
tf.keras.layers.GRU(units, dropout=0.5, return_sequences=True),
tf.keras.layers.GRU(units, dropout=0.5)
])
# fc, [b, 80, 100] => [b, 64] => [b, 1]
self.out_layer = tf.keras.layers.Dense(1)
def call(self, inputs, training=None, mask=None):
"""
net(x) net(x, training=True): train mode
net(x, training=False): test
:param inputs: [b, 80]
:param training:
:param mask:
:return:
"""
# [b, 80}
x = inputs
# embedding: [b, 80] => [b, 80, 100]
x = self.embedding(x)
# run cell compute
# [b, 80, 100] => [b, 64]
x = self.rnn(x)
# out: [b, 64] => [b, 1]
x = self.out_layer(x)
# p(y is pos|x)
prob = tf.sigmoid(x)
return prob
def main():
units = 64
epochs = 4
t0 = time.time()
model = MyRNN(units)
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
loss=tf.losses.BinaryCrossentropy(),
metrics=['accuracy'])
model.fit(train_data, epochs=epochs, validation_data=test_data, validation_freq=1)
model.evaluate(test_data)
# accuracy: 0.8311 total time cost: 26.32
t1 = time.time()
print('total time cost:', t1 - t0)
if __name__ == '__main__':
main()