1. Recurrent Neural Network (LSTM)
解析:
# -*- coding: utf-8 -*-
"""
Simple example using LSTM recurrent neural network to classify IMDB
sentiment dataset.
"""
from __future__ import division, print_function, absolute_import
import tflearn
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb
# IMDB Dataset loading
train, test, _ = imdb.load_data(path='imdb.pkl', n_words=10000,
valid_portion=0.1)
trainX, trainY = train
testX, testY = test
# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=100, value=0.)
testX = pad_sequences(testX, maxlen=100, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY)
testY = to_categorical(testY)
# Network building
net = tflearn.input_data([None, 100])
net = tflearn.embedding(net, input_dim=10000, output_dim=128)
net = tflearn.lstm(net, 128, dropout=0.8)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
loss='categorical_crossentropy')
# Training
model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True,
batch_size=32)
2. Bi-Directional RNN (LSTM)
解析:
# -*- coding: utf-8 -*-
"""
Simple example using LSTM recurrent neural network to classify IMDB
sentiment dataset.
"""
from __future__ import division, print_function, absolute_import
import tflearn
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.embedding_ops import embedding
from tflearn.layers.recurrent import bidirectional_rnn, BasicLSTMCell
from tflearn.layers.estimator import regression
# IMDB Dataset loading
train, test, _ = imdb.load_data(path='imdb.pkl', n_words=10000,
valid_portion=0.1)
trainX, trainY = train
testX, testY = test
# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=200, value=0.)
testX = pad_sequences(testX, maxlen=200, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY)
testY = to_categorical(testY)
# Network building
net = input_data(shape=[None, 200])
net = embedding(net, input_dim=20000, output_dim=128)
net = bidirectional_rnn(net, BasicLSTMCell(128), BasicLSTMCell(128))
net = dropout(net, 0.5)
net = fully_connected(net, 2, activation='softmax')
net = regression(net, optimizer='adam', loss='categorical_crossentropy')
# Training
model = tflearn.DNN(net, clip_gradients=0., tensorboard_verbose=2)
model.fit(trainX, trainY, validation_set=0.1, show_metric=True, batch_size=64)
3. Dynamic RNN (LSTM)
解析:
# -*- coding: utf-8 -*-
"""
Simple example using a Dynamic RNN (LSTM) to classify IMDB sentiment dataset.
Dynamic computation are performed over sequences with variable length.
"""
from __future__ import division, print_function, absolute_import
import tflearn
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb
# IMDB Dataset loading
train, test, _ = imdb.load_data(path='imdb.pkl', n_words=10000,
valid_portion=0.1)
trainX, trainY = train
testX, testY = test
# Data preprocessing
# NOTE: Padding is required for dimension consistency. This will pad sequences
# with 0 at the end, until it reaches the max sequence length. 0 is used as a
# masking value by dynamic RNNs in TFLearn; a sequence length will be
# retrieved by counting non zero elements in a sequence. Then dynamic RNN step
# computation is performed according to that length.
trainX = pad_sequences(trainX, maxlen=100, value=0.)
testX = pad_sequences(testX, maxlen=100, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY)
testY = to_categorical(testY)
# Network building
net = tflearn.input_data([None, 100])
# Masking is not required for embedding, sequence length is computed prior to
# the embedding op and assigned as 'seq_length' attribute to the returned Tensor.
net = tflearn.embedding(net, input_dim=10000, output_dim=128)
net = tflearn.lstm(net, 128, dropout=0.8, dynamic=True)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
loss='categorical_crossentropy')
# Training
model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True,
batch_size=32)
4. City Name Generation
解析:
from __future__ import absolute_import, division, print_function
import os
from six import moves
import ssl
import tflearn
from tflearn.data_utils import *
path = "US_Cities.txt"
if not os.path.isfile(path):
context = ssl._create_unverified_context()
moves.urllib.request.urlretrieve(
"https://raw.githubusercontent.com/tflearn/tflearn.github.io/master/resources/US_Cities.txt", path,
context=context)
maxlen = 20
string_utf8 = open(path, "r").read().decode('utf-8')
X, Y, char_idx = \
string_to_semi_redundant_sequences(string_utf8, seq_maxlen=maxlen, redun_step=3)
g = tflearn.input_data(shape=[None, maxlen, len(char_idx)])
g = tflearn.lstm(g, 512, return_seq=True)
g = tflearn.dropout(g, 0.5)
g = tflearn.lstm(g, 512)
g = tflearn.dropout(g, 0.5)
g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy',
learning_rate=0.001)
m = tflearn.SequenceGenerator(g, dictionary=char_idx,
seq_maxlen=maxlen,
clip_gradients=5.0,
checkpoint_path='model_us_cities')
for i in range(40):
seed = random_sequence_from_string(string_utf8, maxlen)
m.fit(X, Y, validation_set=0.1, batch_size=128,
n_epoch=1, run_id='us_cities')
print("-- TESTING...")
print("-- Test with temperature of 1.2 --")
print(m.generate(30, temperature=1.2, seq_seed=seed).encode('utf-8'))
print("-- Test with temperature of 1.0 --")
print(m.generate(30, temperature=1.0, seq_seed=seed).encode('utf-8'))
print("-- Test with temperature of 0.5 --")
print(m.generate(30, temperature=0.5, seq_seed=seed).encode('utf-8'))
5. Shakespeare Scripts Generation
解析:
from __future__ import absolute_import, division, print_function
import os
import pickle
from six.moves import urllib
import tflearn
from tflearn.data_utils import *
path = "shakespeare_input.txt"
char_idx_file = 'char_idx.pickle'
if not os.path.isfile(path):
urllib.request.urlretrieve(
"https://raw.githubusercontent.com/tflearn/tflearn.github.io/master/resources/shakespeare_input.txt", path)
maxlen = 25
char_idx = None
if os.path.isfile(char_idx_file):
print('Loading previous char_idx')
char_idx = pickle.load(open(char_idx_file, 'rb'))
X, Y, char_idx = \
textfile_to_semi_redundant_sequences(path, seq_maxlen=maxlen, redun_step=3,
pre_defined_char_idx=char_idx)
pickle.dump(char_idx, open(char_idx_file, 'wb'))
g = tflearn.input_data([None, maxlen, len(char_idx)])
g = tflearn.lstm(g, 512, return_seq=True)
g = tflearn.dropout(g, 0.5)
g = tflearn.lstm(g, 512, return_seq=True)
g = tflearn.dropout(g, 0.5)
g = tflearn.lstm(g, 512)
g = tflearn.dropout(g, 0.5)
g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy',
learning_rate=0.001)
m = tflearn.SequenceGenerator(g, dictionary=char_idx,
seq_maxlen=maxlen,
clip_gradients=5.0,
checkpoint_path='model_shakespeare')
for i in range(50):
seed = random_sequence_from_textfile(path, maxlen)
m.fit(X, Y, validation_set=0.1, batch_size=128,
n_epoch=1, run_id='shakespeare')
print("-- TESTING...")
print("-- Test with temperature of 1.0 --")
print(m.generate(600, temperature=1.0, seq_seed=seed))
print("-- Test with temperature of 0.5 --")
print(m.generate(600, temperature=0.5, seq_seed=seed))
6. Seq2seq
解析:
'''
Pedagogical example realization of seq2seq recurrent neural networks, using TensorFlow and TFLearn.
More info at https://github.com/ichuang/tflearn_seq2seq
'''
from __future__ import division, print_function
import os
import sys
import tflearn
import argparse
import json
import numpy as np
import tensorflow as tf
from tensorflow.contrib.legacy_seq2seq.python.ops import seq2seq
from tensorflow.python.ops import rnn_cell
# -----------------------------------------------------------------------------
class SequencePattern(object):
INPUT_SEQUENCE_LENGTH = 10
OUTPUT_SEQUENCE_LENGTH = 10
INPUT_MAX_INT = 9
OUTPUT_MAX_INT = 9
PATTERN_NAME = "sorted"
def __init__(self, name=None, in_seq_len=None, out_seq_len=None):
if name is not None:
assert hasattr(self, "%s_sequence" % name)
self.PATTERN_NAME = name
if in_seq_len:
self.INPUT_SEQUENCE_LENGTH = in_seq_len
if out_seq_len:
self.OUTPUT_SEQUENCE_LENGTH = out_seq_len
def generate_output_sequence(self, x):
'''
For a given input sequence, generate the output sequence. x is a 1D numpy array
of integers, with length INPUT_SEQUENCE_LENGTH.
Returns a 1D numpy array of length OUTPUT_SEQUENCE_LENGTH
This procedure defines the pattern which the seq2seq RNN will be trained to find.
'''
return getattr(self, "%s_sequence" % self.PATTERN_NAME)(x)
def maxmin_dup_sequence(self, x):
'''
Generate sequence with [max, min, rest of original entries]
'''
x = np.array(x)
y = [x.max(), x.min()] + list(x[2:])
return np.array(y)[:self.OUTPUT_SEQUENCE_LENGTH] # truncate at out seq len
def sorted_sequence(self, x):
'''
Generate sorted version of original sequence
'''
return np.array(sorted(x))[:self.OUTPUT_SEQUENCE_LENGTH]
def reversed_sequence(self, x):
'''
Generate reversed version of original sequence
'''
return np.array(x[::-1])[:self.OUTPUT_SEQUENCE_LENGTH]
# -----------------------------------------------------------------------------
class TFLearnSeq2Seq(object):
'''
seq2seq recurrent neural network, implemented using TFLearn.
'''
AVAILABLE_MODELS = ["embedding_rnn", "embedding_attention"]
def __init__(self, sequence_pattern, seq2seq_model=None, verbose=None, name=None, data_dir=None):
'''
sequence_pattern_class = a SequencePattern class instance, which defines pattern parameters
(input, output lengths, name, generating function)
seq2seq_model = string specifying which seq2seq model to use, e.g. "embedding_rnn"
'''
self.sequence_pattern = sequence_pattern
self.seq2seq_model = seq2seq_model or "embedding_rnn"
assert self.seq2seq_model in self.AVAILABLE_MODELS
self.in_seq_len = self.sequence_pattern.INPUT_SEQUENCE_LENGTH
self.out_seq_len = self.sequence_pattern.OUTPUT_SEQUENCE_LENGTH
self.in_max_int = self.sequence_pattern.INPUT_MAX_INT
self.out_max_int = self.sequence_pattern.OUTPUT_MAX_INT
self.verbose = verbose or 0
self.n_input_symbols = self.in_max_int + 1
self.n_output_symbols = self.out_max_int + 2 # extra one for GO symbol
self.model_instance = None
self.name = name
self.data_dir = data_dir
def generate_trainig_data(self, num_points):
'''
Generate training dataset. Produce random (integer) sequences X, and corresponding
expected output sequences Y = generate_output_sequence(X).
Return xy_data, y_data (both of type uint32)
xy_data = numpy array of shape [num_points, in_seq_len + out_seq_len], with each point being X + Y
y_data = numpy array of shape [num_points, out_seq_len]
'''
x_data = np.random.randint(0, self.in_max_int,
size=(num_points, self.in_seq_len)) # shape [num_points, in_seq_len]
x_data = x_data.astype(np.uint32) # ensure integer type
y_data = [self.sequence_pattern.generate_output_sequence(x) for x in x_data]
y_data = np.array(y_data)
xy_data = np.append(x_data, y_data, axis=1) # shape [num_points, 2*seq_len]
return xy_data, y_data
def sequence_loss(self, y_pred, y_true):
'''
Loss function for the seq2seq RNN. Reshape predicted and true (label) tensors, generate dummy weights,
then use seq2seq.sequence_loss to actually compute the loss function.
'''
if self.verbose > 2: print("my_sequence_loss y_pred=%s, y_true=%s" % (y_pred, y_true))
logits = tf.unstack(y_pred, axis=1) # list of [-1, num_decoder_synbols] elements
targets = tf.unstack(y_true,
axis=1) # y_true has shape [-1, self.out_seq_len]; unpack to list of self.out_seq_len [-1] elements
if self.verbose > 2:
print("my_sequence_loss logits=%s" % (logits,))
print("my_sequence_loss targets=%s" % (targets,))
weights = [tf.ones_like(yp, dtype=tf.float32) for yp in targets]
if self.verbose > 4: print("my_sequence_loss weights=%s" % (weights,))
sl = seq2seq.sequence_loss(logits, targets, weights)
if self.verbose > 2: print("my_sequence_loss return = %s" % sl)
return sl
def accuracy(self, y_pred, y_true,
x_in): # y_pred is [-1, self.out_seq_len, num_decoder_symbols]; y_true is [-1, self.out_seq_len]
'''
Compute accuracy of the prediction, based on the true labels. Use the average number of equal
values.
'''
pred_idx = tf.to_int32(tf.argmax(y_pred, 2)) # [-1, self.out_seq_len]
if self.verbose > 2: print("my_accuracy pred_idx = %s" % pred_idx)
accuracy = tf.reduce_mean(tf.cast(tf.equal(pred_idx, y_true), tf.float32), name='acc')
return accuracy
def model(self, mode="train", num_layers=1, cell_size=32, cell_type="BasicLSTMCell", embedding_size=20,
learning_rate=0.0001,
tensorboard_verbose=0, checkpoint_path=None):
'''
Build tensor specifying graph of operations for the seq2seq neural network model.
mode = string, either "train" or "predict"
cell_type = attribute of rnn_cell specifying which RNN cell type to use
cell_size = size for the hidden layer in the RNN cell
num_layers = number of RNN cell layers to use
Return TFLearn model instance. Use DNN model for this.
'''
assert mode in ["train", "predict"]
checkpoint_path = checkpoint_path or (
"%s%ss2s_checkpoint.tfl" % (self.data_dir or "", "/" if self.data_dir else ""))
GO_VALUE = self.out_max_int + 1 # unique integer value used to trigger decoder outputs in the seq2seq RNN
network = tflearn.input_data(shape=[None, self.in_seq_len + self.out_seq_len], dtype=tf.int32, name="XY")
encoder_inputs = tf.slice(network, [0, 0], [-1, self.in_seq_len], name="enc_in") # get encoder inputs
encoder_inputs = tf.unstack(encoder_inputs,
axis=1) # transform into list of self.in_seq_len elements, each [-1]
decoder_inputs = tf.slice(network, [0, self.in_seq_len], [-1, self.out_seq_len],
name="dec_in") # get decoder inputs
decoder_inputs = tf.unstack(decoder_inputs,
axis=1) # transform into list of self.out_seq_len elements, each [-1]
go_input = tf.multiply(tf.ones_like(decoder_inputs[0], dtype=tf.int32),
GO_VALUE) # insert "GO" symbol as the first decoder input; drop the last decoder input
decoder_inputs = [go_input] + decoder_inputs[
: self.out_seq_len - 1] # insert GO as first; drop last decoder input
feed_previous = not (mode == "train")
if self.verbose > 3:
print("feed_previous = %s" % str(feed_previous))
print("encoder inputs: %s" % str(encoder_inputs))
print("decoder inputs: %s" % str(decoder_inputs))
print("len decoder inputs: %s" % len(decoder_inputs))
self.n_input_symbols = self.in_max_int + 1 # default is integers from 0 to 9
self.n_output_symbols = self.out_max_int + 2 # extra "GO" symbol for decoder inputs
single_cell = getattr(rnn_cell, cell_type)(cell_size, state_is_tuple=True)
if num_layers == 1:
cell = single_cell
else:
cell = rnn_cell.MultiRNNCell([single_cell] * num_layers)
if self.seq2seq_model == "embedding_rnn":
model_outputs, states = seq2seq.embedding_rnn_seq2seq(encoder_inputs,
# encoder_inputs: A list of 2D Tensors [batch_size, input_size].
decoder_inputs,
cell,
num_encoder_symbols=self.n_input_symbols,
num_decoder_symbols=self.n_output_symbols,
embedding_size=embedding_size,
feed_previous=feed_previous)
elif self.seq2seq_model == "embedding_attention":
model_outputs, states = seq2seq.embedding_attention_seq2seq(encoder_inputs,
# encoder_inputs: A list of 2D Tensors [batch_size, input_size].
decoder_inputs,
cell,
num_encoder_symbols=self.n_input_symbols,
num_decoder_symbols=self.n_output_symbols,
embedding_size=embedding_size,
num_heads=1,
initial_state_attention=False,
feed_previous=feed_previous)
else:
raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' % self.seq2seq_model)
tf.add_to_collection(tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model",
model_outputs) # for TFLearn to know what to save and restore
# model_outputs: list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs.
if self.verbose > 2: print("model outputs: %s" % model_outputs)
network = tf.stack(model_outputs,
axis=1) # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols]
if self.verbose > 2: print("packed model outputs: %s" % network)
if self.verbose > 3:
all_vars = tf.get_collection(tf.GraphKeys.VARIABLES)
print("all_vars = %s" % all_vars)
with tf.name_scope("TargetsData"): # placeholder for target variable (i.e. trainY input)
targetY = tf.placeholder(shape=[None, self.out_seq_len], dtype=tf.int32, name="Y")
network = tflearn.regression(network,
placeholder=targetY,
optimizer='adam',
learning_rate=learning_rate,
loss=self.sequence_loss,
metric=self.accuracy,
name="Y")
model = tflearn.DNN(network, tensorboard_verbose=tensorboard_verbose, checkpoint_path=checkpoint_path)
return model
def train(self, num_epochs=20, num_points=100000, model=None, model_params=None, weights_input_fn=None,
validation_set=0.1, snapshot_step=5000, batch_size=128, weights_output_fn=None):
'''
Train model, with specified number of epochs, and dataset size.
Use specified model, or create one if not provided. Load initial weights from file weights_input_fn,
if provided. validation_set specifies what to use for the validation.
Returns logits for prediction, as an numpy array of shape [out_seq_len, n_output_symbols].
'''
trainXY, trainY = self.generate_trainig_data(num_points)
print("[TFLearnSeq2Seq] Training on %d point dataset (pattern '%s'), with %d epochs" % (num_points,
self.sequence_pattern.PATTERN_NAME,
num_epochs))
if self.verbose > 1:
print(" model parameters: %s" % json.dumps(model_params, indent=4))
model_params = model_params or {}
model = model or self.setup_model("train", model_params, weights_input_fn)
model.fit(trainXY, trainY,
n_epoch=num_epochs,
validation_set=validation_set,
batch_size=batch_size,
shuffle=True,
show_metric=True,
snapshot_step=snapshot_step,
snapshot_epoch=False,
run_id="TFLearnSeq2Seq"
)
print("Done!")
if weights_output_fn is not None:
weights_output_fn = self.canonical_weights_fn(weights_output_fn)
model.save(weights_output_fn)
print("Saved %s" % weights_output_fn)
self.weights_output_fn = weights_output_fn
return model
def canonical_weights_fn(self, iteration_num=0):
'''
Construct canonical weights filename, based on model and pattern names.
'''
if not type(iteration_num) == int:
try:
iteration_num = int(iteration_num)
except Exception as err:
return iteration_num
model_name = self.name or "basic"
wfn = "ts2s__%s__%s_%s.tfl" % (model_name, self.sequence_pattern.PATTERN_NAME, iteration_num)
if self.data_dir:
wfn = "%s/%s" % (self.data_dir, wfn)
self.weights_filename = wfn
return wfn
def setup_model(self, mode, model_params=None, weights_input_fn=None):
'''
Setup a model instance, using the specified mode and model parameters.
Load the weights from the specified file, if it exists.
If weights_input_fn is an integer, use that the model name, and
the pattern name, to construct a canonical filename.
'''
model_params = model_params or {}
model = self.model_instance or self.model(mode=mode, **model_params)
self.model_instance = model
if weights_input_fn:
if type(weights_input_fn) == int:
weights_input_fn = self.canonical_weights_fn(weights_input_fn)
if os.path.exists(weights_input_fn):
model.load(weights_input_fn)
print("[TFLearnSeq2Seq] model weights loaded from %s" % weights_input_fn)
else:
print("[TFLearnSeq2Seq] MISSING model weights file %s" % weights_input_fn)
return model
def predict(self, Xin, model=None, model_params=None, weights_input_fn=None):
'''
Make a prediction, using the seq2seq model, for the given input sequence Xin.
If model is not provided, create one (or use last created instance).
Return prediction, y
prediction = array of integers, giving output prediction. Length = out_seq_len
y = array of shape [out_seq_len, out_max_int], giving logits for output prediction
'''
if not model:
model = self.model_instance or self.setup_model("predict", model_params, weights_input_fn)
if self.verbose: print("Xin = %s" % str(Xin))
X = np.array(Xin).astype(np.uint32)
assert len(X) == self.in_seq_len
if self.verbose:
print("X Input shape=%s, data=%s" % (X.shape, X))
print("Expected output = %s" % str(self.sequence_pattern.generate_output_sequence(X)))
Yin = [0] * self.out_seq_len
XY = np.append(X, np.array(Yin).astype(np.float32))
XY = XY.reshape([-1, self.in_seq_len + self.out_seq_len]) # batch size 1
if self.verbose > 1: print("XY Input shape=%s, data=%s" % (XY.shape, XY))
res = model.predict(XY)
res = np.array(res)
if self.verbose > 1: print("prediction shape = %s" % str(res.shape))
y = res.reshape(self.out_seq_len, self.n_output_symbols)
prediction = np.argmax(y, axis=1)
if self.verbose:
print("Predicted output sequence: %s" % str(prediction))
return prediction, y
# -----------------------------------------------------------------------------
class VAction(argparse.Action):
def __call__(self, parser, args, values, option_string=None):
curval = getattr(args, self.dest, 0) or 0
values = values.count('v') + 1
setattr(args, self.dest, values + curval)
# -----------------------------------------------------------------------------
def CommandLine(args=None, arglist=None):
'''
Main command line. Accepts args, to allow for simple unit testing.
'''
help_text = """
Commands:
train - give size of training set to use, as argument
predict - give input sequence as argument (or specify inputs via --from-file )
"""
parser = argparse.ArgumentParser(description=help_text, formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("cmd", help="command")
parser.add_argument("cmd_input", nargs='*', help="input to command")
parser.add_argument('-v', "--verbose", nargs=0,
help="increase output verbosity (add more -v to increase versbosity)", action=VAction,
dest='verbose')
parser.add_argument("-m", "--model",
help="seq2seq model name: either embedding_rnn (default) or embedding_attention", default=None)
parser.add_argument("-r", "--learning-rate", type=float, help="learning rate (default 0.0001)", default=0.0001)
parser.add_argument("-e", "--epochs", type=int, help="number of trainig epochs", default=10)
parser.add_argument("-i", "--input-weights", type=str, help="tflearn file with network weights to load",
default=None)
parser.add_argument("-o", "--output-weights", type=str,
help="new tflearn file where network weights are to be saved", default=None)
parser.add_argument("-p", "--pattern-name", type=str, help="name of pattern to use for sequence", default=None)
parser.add_argument("-n", "--name", type=str, help="name of model, used when generating default weights filenames",
default=None)
parser.add_argument("--in-len", type=int, help="input sequence length (default 10)", default=None)
parser.add_argument("--out-len", type=int, help="output sequence length (default 10)", default=None)
parser.add_argument("--from-file", type=str, help="name of file to take input data sequences from (json format)",
default=None)
parser.add_argument("--iter-num", type=int,
help="training iteration number; specify instead of input- or output-weights to use generated filenames",
default=None)
parser.add_argument("--data-dir",
help="directory to use for storing checkpoints (also used when generating default weights filenames)",
default=None)
# model parameters
parser.add_argument("-L", "--num-layers", type=int, help="number of RNN layers to use in the model (default 1)",
default=1)
parser.add_argument("--cell-size", type=int, help="size of RNN cell to use (default 32)", default=32)
parser.add_argument("--cell-type", type=str, help="type of RNN cell to use (default BasicLSTMCell)",
default="BasicLSTMCell")
parser.add_argument("--embedding-size", type=int, help="size of embedding to use (default 20)", default=20)
parser.add_argument("--tensorboard-verbose", type=int, help="tensorboard verbosity level (default 0)", default=0)
if not args:
args = parser.parse_args(arglist)
if args.iter_num is not None:
args.input_weights = args.iter_num
args.output_weights = args.iter_num + 1
model_params = dict(num_layers=args.num_layers,
cell_size=args.cell_size,
cell_type=args.cell_type,
embedding_size=args.embedding_size,
learning_rate=args.learning_rate,
tensorboard_verbose=args.tensorboard_verbose,
)
if args.cmd == "train":
try:
num_points = int(args.cmd_input[0])
except:
raise Exception("Please specify the number of datapoints to use for training, as the first argument")
sp = SequencePattern(args.pattern_name, in_seq_len=args.in_len, out_seq_len=args.out_len)
ts2s = TFLearnSeq2Seq(sp, seq2seq_model=args.model, data_dir=args.data_dir, name=args.name,
verbose=args.verbose)
ts2s.train(num_epochs=args.epochs, num_points=num_points, weights_output_fn=args.output_weights,
weights_input_fn=args.input_weights, model_params=model_params)
return ts2s
elif args.cmd == "predict":
if args.from_file:
inputs = json.loads(args.from_file)
try:
input_x = map(int, args.cmd_input)
inputs = [input_x]
except:
raise Exception("Please provide a space-delimited input sequence as the argument")
sp = SequencePattern(args.pattern_name, in_seq_len=args.in_len, out_seq_len=args.out_len)
ts2s = TFLearnSeq2Seq(sp, seq2seq_model=args.model, data_dir=args.data_dir, name=args.name,
verbose=args.verbose)
results = []
for x in inputs:
prediction, y = ts2s.predict(x, weights_input_fn=args.input_weights, model_params=model_params)
print("==> For input %s, prediction=%s (expected=%s)" % (x, prediction, sp.generate_output_sequence(x)))
results.append([prediction, y])
ts2s.prediction_results = results
return ts2s
else:
print("Unknown command %s" % args.cmd)
# -----------------------------------------------------------------------------
# unit tests
def test_sp1():
'''
Test two different SequencePattern instances
'''
sp = SequencePattern("maxmin_dup")
y = sp.generate_output_sequence(range(10))
assert all(y == np.array([9, 0, 2, 3, 4, 5, 6, 7, 8, 9]))
sp = SequencePattern("sorted")
y = sp.generate_output_sequence([5, 6, 1, 2, 9])
assert all(y == np.array([1, 2, 5, 6, 9]))
sp = SequencePattern("reversed")
y = sp.generate_output_sequence(range(10))
assert all(y == np.array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]))
def test_sp2():
'''
Test two SequencePattern instance with lengths different from default
'''
sp = SequencePattern("sorted", in_seq_len=20, out_seq_len=5)
x = np.random.randint(0, 9, 20)
y = sp.generate_output_sequence(x)
assert len(y) == 5
y_exp = sorted(x)[:5]
assert all(y == y_exp)
def test_train1():
'''
Test simple training of an embedding_rnn seq2seq model
'''
sp = SequencePattern()
ts2s = TFLearnSeq2Seq(sp)
ofn = "test_%s" % ts2s.canonical_weights_fn(0)
print("using weights filename %s" % ofn)
if os.path.exists(ofn):
os.unlink(ofn)
tf.reset_default_graph()
ts2s.train(num_epochs=1, num_points=10000, weights_output_fn=ofn)
assert os.path.exists(ofn)
def test_predict1():
'''
Test simple preductions using weights just produced (in test_train1)
'''
sp = SequencePattern()
ts2s = TFLearnSeq2Seq(sp, verbose=1)
wfn = "test_%s" % ts2s.canonical_weights_fn(0)
print("using weights filename %s" % wfn)
tf.reset_default_graph()
prediction, y = ts2s.predict(Xin=range(10), weights_input_fn=wfn)
assert len(prediction == 10)
def test_train_predict2():
'''
Test that the embedding_attention model works, with saving and loading of weights
'''
import tempfile
sp = SequencePattern()
tempdir = tempfile.mkdtemp()
ts2s = TFLearnSeq2Seq(sp, seq2seq_model="embedding_attention", data_dir=tempdir, name="attention")
tf.reset_default_graph()
ts2s.train(num_epochs=1, num_points=1000, weights_output_fn=1, weights_input_fn=0)
assert os.path.exists(ts2s.weights_output_fn)
tf.reset_default_graph()
ts2s = TFLearnSeq2Seq(sp, seq2seq_model="embedding_attention", data_dir="DATA", name="attention", verbose=1)
prediction, y = ts2s.predict(Xin=range(10), weights_input_fn=1)
assert len(prediction == 10)
os.system("rm -rf %s" % tempdir)
def test_train_predict3():
'''
Test that a model trained on sequencees of one length can be used for predictions on other sequence lengths
'''
import tempfile
sp = SequencePattern("sorted", in_seq_len=10, out_seq_len=10)
tempdir = tempfile.mkdtemp()
ts2s = TFLearnSeq2Seq(sp, seq2seq_model="embedding_attention", data_dir=tempdir, name="attention")
tf.reset_default_graph()
ts2s.train(num_epochs=1, num_points=1000, weights_output_fn=1, weights_input_fn=0)
assert os.path.exists(ts2s.weights_output_fn)
tf.reset_default_graph()
sp = SequencePattern("sorted", in_seq_len=20, out_seq_len=8)
tf.reset_default_graph()
ts2s = TFLearnSeq2Seq(sp, seq2seq_model="embedding_attention", data_dir="DATA", name="attention", verbose=1)
x = np.random.randint(0, 9, 20)
prediction, y = ts2s.predict(x, weights_input_fn=1)
assert len(prediction == 8)
os.system("rm -rf %s" % tempdir)
def test_main1():
'''
Integration test - training
'''
import tempfile
tempdir = tempfile.mkdtemp()
arglist = "--data-dir %s -e 2 --iter-num=1 -v -v --tensorboard-verbose=1 train 5000" % tempdir
arglist = arglist.split(' ')
tf.reset_default_graph()
ts2s = CommandLine(arglist=arglist)
assert os.path.exists(ts2s.weights_output_fn)
os.system("rm -rf %s" % tempdir)
def test_main2():
'''
Integration test - training then prediction
'''
import tempfile
tempdir = tempfile.mkdtemp()
arglist = "--data-dir %s -e 2 --iter-num=1 -v -v --tensorboard-verbose=1 train 5000" % tempdir
arglist = arglist.split(' ')
tf.reset_default_graph()
ts2s = CommandLine(arglist=arglist)
wfn = ts2s.weights_output_fn
assert os.path.exists(wfn)
arglist = "-i %s predict 1 2 3 4 5 6 7 8 9 0" % wfn
arglist = arglist.split(' ')
tf.reset_default_graph()
ts2s = CommandLine(arglist=arglist)
assert len(ts2s.prediction_results[0][0]) == 10
os.system("rm -rf %s" % tempdir)
def test_main3():
'''
Integration test - training then prediction: attention model
'''
import tempfile
wfn = "tmp_weights.tfl"
if os.path.exists(wfn):
os.unlink(wfn)
arglist = "-e 2 -o tmp_weights.tfl -v -v -v -v -m embedding_attention train 5000"
arglist = arglist.split(' ')
tf.reset_default_graph()
ts2s = CommandLine(arglist=arglist)
assert os.path.exists(wfn)
arglist = "-i tmp_weights.tfl -v -v -v -v -m embedding_attention predict 1 2 3 4 5 6 7 8 9 0"
arglist = arglist.split(' ')
tf.reset_default_graph()
ts2s = CommandLine(arglist=arglist)
assert len(ts2s.prediction_results[0][0]) == 10
# -----------------------------------------------------------------------------
if __name__ == "__main__":
CommandLine()
7. CNN Seq
解析:
# -*- coding: utf-8 -*-
"""
Simple example using convolutional neural network to classify IMDB
sentiment dataset.
"""
from __future__ import division, print_function, absolute_import
import tensorflow as tf
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_1d, global_max_pool
from tflearn.layers.merge_ops import merge
from tflearn.layers.estimator import regression
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb
# IMDB Dataset loading
train, test, _ = imdb.load_data(path='imdb.pkl', n_words=10000,
valid_portion=0.1)
trainX, trainY = train
testX, testY = test
# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=100, value=0.)
testX = pad_sequences(testX, maxlen=100, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY)
testY = to_categorical(testY)
# Building convolutional network
network = input_data(shape=[None, 100], name='input')
network = tflearn.embedding(network, input_dim=10000, output_dim=128)
branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2")
branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2")
branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2")
network = merge([branch1, branch2, branch3], mode='concat', axis=1)
network = tf.expand_dims(network, 2)
network = global_max_pool(network)
network = dropout(network, 0.5)
network = fully_connected(network, 2, activation='softmax')
network = regression(network, optimizer='adam', learning_rate=0.001,
loss='categorical_crossentropy', name='target')
# Training
model = tflearn.DNN(network, tensorboard_verbose=0)
model.fit(trainX, trainY, n_epoch=5, shuffle=True, validation_set=(testX, testY), show_metric=True, batch_size=32)
参考文献:
[1] TFLearn Examples:https://github.com/tflearn/tflearn/tree/master/examples