依据论文:ANOMALY DETECTION FOR TIME SERIES USING VAE-LSTM HYBRID MODEL(可在IEEE上自行寻找)
代码来源:github
运行环境:gpu
VAE-LSTM原理图:
以下可针对自己的需求进行适当更改。
上图原理可以简单理解为当数据输入时,先由VAE的编码器网络对输入数据进行压缩,并做特征提取,将提取到的特征输入LSTM网络进行故障检测或分类,并对特征进行归类预测,将预测得到的结果输入VAE解码器网络,进行重构,并计算重构损失,更新整体网络参数。VAE与LSTM二者结合,进一步提高模型诊断精度。(详细原理阐述可参见论文原文)
相关程序由6部分组成,一个训练主程序,5个支持子程序。(在加载数据阶段,分好训练集,交叉验证集与测试集)
训练主程序如下:
train.py
import os
import tensorflow as tf
from data_loader import DataGenerator
from model import VAEmodel, lstmKerasModel
from trainer import vaeTrainer
from utils import process_config, create_dirs, get_args, save_config
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
def main():
# capture the config path from the run arguments
# then process the json configuration file
try:
args = get_args()
config = process_config(args.config)
except:
print("missing or invalid arguments")
exit(0)
# create the experiments dirs
create_dirs([config['result_dir'], config['checkpoint_dir'], config['checkpoint_dir_lstm']])
# save the config in a txt file
save_config(config)
# create tensorflow session
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
# create your data generator
data = DataGenerator(config)
# create a CNN model
model_vae = VAEmodel(config)
# create a trainer for VAE model
trainer_vae = vaeTrainer(sess, model_vae, data, config)
model_vae.load(sess)
# here you train your model
if config['TRAIN_VAE']:
if config['num_epochs_vae'] > 0:
trainer_vae.train()
if config['TRAIN_LSTM']:
# create a lstm model class instance
lstm_model = lstmKerasModel(data)
# produce the embedding of all sequences for training of lstm model
# process the windows in sequence to get their VAE embeddings
lstm_model.produce_embeddings(config, model_vae, data, sess)
# Create a basic model instance
lstm_nn_model = lstm_model.create_lstm_model(config)
lstm_nn_model.summary() # Display the model's architecture
# checkpoint path
checkpoint_path = config['checkpoint_dir_lstm']\
+ "cp.ckpt"
# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
save_weights_only=True,
verbose=1)
# load weights if possible
lstm_model.load_model(lstm_nn_model, config, checkpoint_path)
# start training
if config['num_epochs_lstm'] > 0:
lstm_model.train(config, lstm_nn_model, cp_callback)
# make a prediction on the test set using the trained model
lstm_embedding = lstm_nn_model.predict(lstm_model.x_test, batch_size=config['batch_size_lstm'])
print(lstm_embedding.shape)
# visualise the first 10 test sequences
for i in range(10):
lstm_model.plot_lstm_embedding_prediction(i, config, model_vae, sess, data, lstm_embedding)
if __name__ == '__main__':
main()
五个支持程序如下:
base.py,做基本类支持
import tensorflow as tf
import tensorflow_probability as tfp
import random
import numpy as np
import time
import matplotlib.pylab as plt
from matplotlib.pyplot import plot, savefig, figure
from utils import count_trainable_variables
tfd = tfp.distributions
class BaseDataGenerator:
def __init__(self, config):
self.config = config
# separate training and val sets
def separate_train_and_val_set(self, n_win):
n_train = int(np.floor((n_win * 0.9)))
n_val = n_win - n_train
idx_train = random.sample(range(n_win), n_train)
idx_val = list(set(idx_train) ^ set(range(n_win)))
return idx_train, idx_val, n_train, n_val
class BaseModel:
def __init__(self, config):
self.config = config
# init the global step
self.init_global_step()
# init the epoch counter
self.init_cur_epoch()
self.two_pi = tf.constant(2 * np.pi)
# save function that saves the checkpoint in the path defined in the config file
def save(self, sess):
print("Saving model...")
self.saver.save(sess, self.config['checkpoint_dir'],
self.global_step_tensor)
print("Model saved.")
# load latest checkpoint from the experiment path defined in the config file
def load(self, sess):
print("checkpoint_dir at loading: {}".format(self.config['checkpoint_dir']))
latest_checkpoint = tf.train.latest_checkpoint(self.config['checkpoint_dir'])
if latest_checkpoint:
print("Loading model checkpoint {} ...\n".format(latest_checkpoint))
self.saver.restore(sess, latest_checkpoint)
print("Model loaded.")
else:
print("No model loaded.")
# initialize a tensorflow variable to use it as epoch counter
def init_cur_epoch(self):
with tf.variable_scope('cur_epoch'):
self.cur_epoch_tensor = tf.Variable(0, trainable=False, name='cur_epoch')
self.increment_cur_epoch_tensor = tf.assign(self.cur_epoch_tensor, self.cur_epoch_tensor + 1)
# just initialize a tensorflow variable to use it as global step counter
def init_global_step(self):
# DON'T forget to add the global step tensor to the tensorflow trainer
with tf.variable_scope('global_step'):
self.global_step_tensor = tf.Variable(0, trainable=False, name='global_step')
self.increment_global_step_tensor = tf.assign(
self.global_step_tensor, self.global_step_tensor + 1)
def define_loss(self):
with tf.name_scope("loss"):
# KL divergence loss - analytical result
KL_loss = 0.5 * (tf.reduce_sum(tf.square(self.code_mean), 1)
+ tf.reduce_sum(tf.square(self.code_std_dev), 1)
- tf.reduce_sum(tf.log(tf.square(self.code_std_dev)), 1)
- self.config['code_size'])
self.KL_loss = tf.reduce_mean(KL_loss)
# norm 1 of standard deviation of the sample-wise encoder prediction
self.std_dev_norm = tf.reduce_mean(self.code_std_dev, axis=0)
weighted_reconstruction_error_dataset = tf.reduce_sum(
tf.square(self.original_signal - self.decoded), [1, 2])
weighted_reconstruction_error_dataset = tf.reduce_mean(weighted_reconstruction_error_dataset)
self.weighted_reconstruction_error_dataset = weighted_reconstruction_error_dataset / (2 * self.sigma2)
# least squared reconstruction error
ls_reconstruction_error = tf.reduce_sum(
tf.square(self.original_signal - self.decoded), [1, 2])
self.ls_reconstruction_error = tf.reduce_mean(ls_reconstruction_error)
# sigma regularisor - input elbo
self.sigma_regularisor_dataset = self.input_dims / 2 * tf.log(self.sigma2)
two_pi = self.input_dims / 2 * tf.constant(2 * np.pi)
self.elbo_loss = two_pi + self.sigma_regularisor_dataset + \
0.5 * self.weighted_reconstruction_error_dataset + self.KL_loss
def training_variables(self):
encoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "encoder")
decoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "decoder")
sigma_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "sigma2_dataset")
self.train_vars_VAE = encoder_vars + decoder_vars + sigma_vars
num_encoder = count_trainable_variables('encoder')
num_decoder = count_trainable_variables('decoder')
num_sigma2 = count_trainable_variables('sigma2_dataset')
self.num_vars_total = num_decoder + num_encoder + num_sigma2
print("Total number of trainable parameters in the VAE network is: {}".format(self.num_vars_total))
def compute_gradients(self):
self.lr = tf.placeholder(tf.float32, [])
opt = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.9, beta2=0.95)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
gvs_dataset = opt.compute_gradients(self.elbo_loss, var_list=self.train_vars_VAE)
print('gvs for dataset: {}'.format(gvs_dataset))
capped_gvs = [(self.ClipIfNotNone(grad), var) for grad, var in gvs_dataset]
with tf.control_dependencies(update_ops):
self.train_step_gradient = opt.apply_gradients(capped_gvs)
print("Reach the definition of loss for VAE")
def ClipIfNotNone(self, grad):
if grad is None:
return grad
return tf.clip_by_value(grad, -1, 1)
def init_saver(self):
self.saver = tf.train.Saver(max_to_keep=1, var_list=self.train_vars_VAE)
class BaseTrain:
def __init__(self, sess, model, data, config):
self.model = model
self.config = config
self.sess = sess
self.data = data
self.init = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
self.sess.run(self.init)
# keep a record of the training result
self.train_loss = []
self.val_loss = []
self.train_loss_ave_epoch = []
self.val_loss_ave_epoch = []
self.recons_loss_train = []
self.recons_loss_val = []
self.KL_loss_train = []
self.KL_loss_val = []
self.sample_std_dev_train = []
self.sample_std_dev_val = []
self.iter_epochs_list = []
self.test_sigma2 = []
def train(self):
self.start_time = time.time()
for cur_epoch in range(0, self.config['num_epochs_vae'], 1):
self.train_epoch()
# compute current execution time
self.current_time = time.time()
elapsed_time = (self.current_time - self.start_time) / 60
est_remaining_time = (
self.current_time - self.start_time) / (cur_epoch + 1) * (
self.config['num_epochs_vae'] - cur_epoch - 1)
est_remaining_time = est_remaining_time / 60
print("Already trained for {} min; Remaining {} min.".format(elapsed_time, est_remaining_time))
self.sess.run(self.model.increment_cur_epoch_tensor)
def save_variables_VAE(self):
# save some variables for later inspection
file_name = "{}{}-batch-{}-epoch-{}-code-{}-lr-{}.npz".format(self.config['result_dir'],
self.config['exp_name'],
self.config['batch_size'],
self.config['num_epochs_vae'],
self.config['code_size'],
self.config['learning_rate_vae'])
np.savez(file_name,
iter_list_val=self.iter_epochs_list,
train_loss=self.train_loss,
val_loss=self.val_loss,
n_train_iter=self.n_train_iter,
n_val_iter=self.n_val_iter,
recons_loss_train=self.recons_loss_train,
recons_loss_val=self.recons_loss_val,
KL_loss_train=self.KL_loss_train,
KL_loss_val=self.KL_loss_val,
num_para_all=self.model.num_vars_total,
sigma2=self.test_sigma2)
def plot_train_and_val_loss(self):
# plot the training and validation loss over epochs
plt.clf()
figure(num=1, figsize=(8, 6))
plot(self.train_loss, 'b-')
plot(self.iter_epochs_list, self.val_loss_ave_epoch, 'r-')
plt.legend(('training loss (total)', 'validation loss'))
plt.title('training loss over iterations (val @ epochs)')
plt.ylabel('total loss')
plt.xlabel('iterations')
plt.grid(True)
savefig(self.config['result_dir'] + '/loss.png')
# plot individual components of validation loss over epochs
plt.clf()
figure(num=1, figsize=(8, 6))
plot(self.recons_loss_val, 'b-')
plot(self.KL_loss_val, 'r-')
plt.legend(('Reconstruction loss', 'KL loss'))
plt.title('validation loss breakdown')
plt.ylabel('loss')
plt.xlabel('num of batch')
plt.grid(True)
savefig(self.config['result_dir'] + '/val-loss.png')
# plot individual components of validation loss over epochs
plt.clf()
figure(num=1, figsize=(8, 6))
plot(self.test_sigma2, 'b-')
plt.title('sigma2 over training')
plt.ylabel('sigma2')
plt.xlabel('iter')
plt.grid(True)
savefig(self.config['result_dir'] + '/sigma2.png')
data_loader.py,负责加载数据集,并保存原生数据时序图
from base import BaseDataGenerator
import numpy as np
import matplotlib.pylab as plt
from matplotlib.pyplot import savefig
class DataGenerator(BaseDataGenerator):
def __init__(self, config):
super(DataGenerator, self).__init__(config)
# load data here: generate 3 state variables: train_set, val_set and test_set
self.load_NAB_dataset(self.config['dataset'], self.config['y_scale'])
def load_NAB_dataset(self, dataset, y_scale=6):
data_dir = '自己的数据集但要求是.npz压缩格式'
data = np.load(data_dir + dataset + '.npz')
# normalise the dataset by training set mean and std
train_m = data['train_m']
train_std = data['train_std']
readings_normalised = (data['readings'] - train_m) / train_std
# plot normalised data
fig, axs = plt.subplots(1, 1, figsize=(18, 4), edgecolor='k')
fig.subplots_adjust(hspace=.4, wspace=.4)
axs.plot(data['t'], readings_normalised)
if data['idx_split'][0] == 0:
axs.plot(data['idx_split'][1] * np.ones(20), np.linspace(-y_scale, y_scale, 20), 'b-')
else:
for i in range(2):
axs.plot(data['idx_split'][i] * np.ones(20), np.linspace(-y_scale, y_scale, 20), 'b-')
axs.plot(*np.ones(20), np.linspace(-y_scale, y_scale, 20), 'b--')
for j in range(len(data['idx_anomaly'])):
axs.plot(data['idx_anomaly'][j] * np.ones(20), np.linspace(-y_scale, 0.75 * y_scale, 20), 'r--')
axs.grid(True)
axs.set_xlim(0, len(data['t']))
axs.set_ylim(-y_scale, y_scale)
axs.set_xlabel("timestamp (every {})".format(data['t_unit']))
axs.set_ylabel("readings")
axs.set_title("{} dataset\n(normalised by train mean {:.4f} and std {:.4f})".format(dataset, train_m, train_std))
axs.legend(('data', 'train test set split', 'anomalies'))
savefig(self.config['result_dir'] + '/raw_data_normalised.pdf')
# slice training set into rolling windows
n_train_sample = len(data['training'])
n_train_vae = n_train_sample - self.config['l_win'] + 1
rolling_windows = np.zeros((n_train_vae, self.config['l_win']))
for i in range(n_train_sample - self.config['l_win'] + 1):
rolling_windows[i] = data['training'][i:i + self.config['l_win']]
# create VAE training and validation set
idx_train, idx_val, self.n_train_vae, self.n_val_vae = self.separate_train_and_val_set(n_train_vae)
self.train_set_vae = dict(data=np.expand_dims(rolling_windows[idx_train], -1))
self.val_set_vae = dict(data=np.expand_dims(rolling_windows[idx_val], -1))
self.test_set_vae = dict(data=np.expand_dims(rolling_windows[idx_val[:self.config['batch_size']]], -1))
# create LSTM training and validation set
for k in range(self.config['l_win']):
n_not_overlap_wins = (n_train_sample - k) // self.config['l_win']
n_train_lstm = n_not_overlap_wins - self.config['l_seq'] + 1
cur_lstm_seq = np.zeros((n_train_lstm, self.config['l_seq'], self.config['l_win']))
for i in range(n_train_lstm):
cur_seq = np.zeros((self.config['l_seq'], self.config['l_win']))
for j in range(self.config['l_seq']):
# print(k,i,j)
cur_seq[j] = data['training'][k + self.config['l_win'] * (j + i): k + self.config['l_win'] * (j + i + 1)]
cur_lstm_seq[i] = cur_seq
if k == 0:
lstm_seq = cur_lstm_seq
else:
lstm_seq = np.concatenate((lstm_seq, cur_lstm_seq), axis=0)
n_train_lstm = lstm_seq.shape[0]
idx_train, idx_val, self.n_train_lstm, self.n_val_lstm = self.separate_train_and_val_set(n_train_lstm)
self.train_set_lstm = dict(data=np.expand_dims(lstm_seq[idx_train], -1))
self.val_set_lstm = dict(data=np.expand_dims(lstm_seq[idx_val], -1))
def plot_time_series(self, data, time, data_list):
fig, axs = plt.subplots(1, 4, figsize=(18, 2.5), edgecolor='k')
fig.subplots_adjust(hspace=.8, wspace=.4)
axs = axs.ravel()
for i in range(4):
axs[i].plot(time / 60., data[:, i])
axs[i].set_title(data_list[i])
axs[i].set_xlabel('time (h)')
axs[i].set_xlim((np.amin(time) / 60., np.amax(time) / 60.))
savefig(self.config['result_dir'] + '/raw_training_set_normalised.pdf')
utils.py,数据集加载辅助
import json
import os
import argparse
import tensorflow as tf
from datetime import datetime
def get_config_from_json(json_file):
"""
Get the config from a json file
:param json_file:
:return: config(dictionary)
"""
# parse the configurations from the config json file provided
with open(json_file, 'r') as config_file:
config_dict = json.load(config_file)
return config_dict
def save_config(config):
dateTimeObj = datetime.now()
timestampStr = dateTimeObj.strftime("%d-%b-%Y-%H-%M")
filename = config['result_dir'] + 'training_config_{}.txt'.format(timestampStr)
config_to_save = json.dumps(config)
f = open(filename, "w")
f.write(config_to_save)
f.close()
def process_config(json_file):
config = get_config_from_json(json_file)
# create directories to save experiment results and trained models
if config['load_dir'] == "default":
save_dir = "../experiments/local-results/{}/{}/batch-{}".format(
config['exp_name'], config['dataset'], config['batch_size'])
else:
save_dir = config['load_dir']
# specify the saving folder name for this experiment
if config['TRAIN_sigma'] == 1:
save_name = '{}-{}-{}-{}-{}-trainSigma'.format(config['exp_name'],
config['dataset'],
config['l_win'],
config['l_seq'],
config['code_size'])
else:
save_name = '{}-{}-{}-{}-{}-fixedSigma-{}'.format(config['exp_name'],
config['dataset'],
config['l_win'],
config['l_seq'],
config['code_size'],
config['sigma'])
config['summary_dir'] = os.path.join(save_dir, save_name, "summary/")
config['result_dir'] = os.path.join(save_dir, save_name, "result/")
config['checkpoint_dir'] = os.path.join(save_dir, save_name, "checkpoint/")
config['checkpoint_dir_lstm'] = os.path.join(save_dir, save_name, "checkpoint/lstm/")
return config
def create_dirs(dirs):
"""
dirs - a list of directories to create if these directories are not found
:param dirs:
:return exit_code: 0:success -1:failed
"""
try:
for dir_ in dirs:
if not os.path.exists(dir_):
os.makedirs(dir_)
return 0
except Exception as err:
print("Creating directories error: {0}".format(err))
exit(-1)
def count_trainable_variables(scope_name):
total_parameters = 0
for variable in tf.trainable_variables(scope_name):
# shape is an array of tf.Dimension
shape = variable.get_shape()
variable_parameters = 1
for dim in shape:
variable_parameters *= dim.value
total_parameters += variable_parameters
print(
'The total number of trainable parameters in the {} model is: {}'.format(scope_name, total_parameters))
return total_parameters
def get_args():
argparser = argparse.ArgumentParser(description=__doc__)
argparser.add_argument(
'-c', '--config',
metavar='C',
default='None',
help='The Configuration file')
args = argparser.parse_args()
return args
model.py,VAE和LSTM模型搭建
from base import BaseModel
import os
import numpy as np
import matplotlib.pylab as plt
from matplotlib.pyplot import savefig
import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions
class VAEmodel(BaseModel):
def __init__(self, config):
super(VAEmodel, self).__init__(config)
self.input_dims = self.config['l_win'] * self.config['n_channel']
self.define_iterator()
self.build_model()
self.define_loss()
self.training_variables()
self.compute_gradients()
self.init_saver()
def define_iterator(self):
self.original_signal = tf.placeholder(tf.float32, [None, self.config['l_win'], self.config['n_channel']])
self.seed = tf.placeholder(tf.int64, shape=())
self.dataset = tf.data.Dataset.from_tensor_slices(self.original_signal)
self.dataset = self.dataset.shuffle(buffer_size=60000, seed=self.seed)
self.dataset = self.dataset.repeat(8000)
self.dataset = self.dataset.batch(self.config['batch_size'], drop_remainder=True)
self.iterator = self.dataset.make_initializable_iterator()
self.input_image = self.iterator.get_next()
self.code_input = tf.placeholder(tf.float32, [None, self.config['code_size']])
self.is_code_input = tf.placeholder(tf.bool)
self.sigma2_offset = tf.constant(self.config['sigma2_offset'])
def build_model(self):
init = tf.contrib.layers.xavier_initializer()
with tf.variable_scope('encoder'):
input_tensor = tf.expand_dims(self.original_signal, -1)
if self.config['l_win'] == 24:
conv_1 = tf.layers.conv2d(inputs=tf.pad(input_tensor, [[0, 0], [4, 4], [0, 0], [0, 0]], "SYMMETRIC"),
filters=self.config['num_hidden_units'] / 16,
kernel_size=(3, self.config['n_channel']),
strides=(2, 1),
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
print("conv_1: {}".format(conv_1))
conv_2 = tf.layers.conv2d(inputs=conv_1,
filters=self.config['num_hidden_units'] / 8,
kernel_size=(3, self.config['n_channel']),
strides=(2, 1),
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
print("conv_2: {}".format(conv_2))
conv_3 = tf.layers.conv2d(inputs=conv_2,
filters=self.config['num_hidden_units'] / 4,
kernel_size=(3, self.config['n_channel']),
strides=(2, 1),
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
print("conv_3: {}".format(conv_3))
conv_4 = tf.layers.conv2d(inputs=conv_3,
filters=self.config['num_hidden_units'],
kernel_size=(4, self.config['n_channel']),
strides=1,
padding='valid',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
print("conv_4: {}".format(conv_4))
elif self.config['l_win'] == 48:
conv_1 = tf.layers.conv2d(input_tensor,
filters=self.config['num_hidden_units'] / 16,
kernel_size=(3, self.config['n_channel']),
strides=(2, 1),
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
print("conv_1: {}".format(conv_1))
conv_2 = tf.layers.conv2d(inputs=conv_1,
filters=self.config['num_hidden_units'] / 8,
kernel_size=(3, self.config['n_channel']),
strides=(2, 1),
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
print("conv_2: {}".format(conv_2))
conv_3 = tf.layers.conv2d(inputs=conv_2,
filters=self.config['num_hidden_units'] / 4,
kernel_size=(3, self.config['n_channel']),
strides=(2, 1),
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
print("conv_3: {}".format(conv_3))
conv_4 = tf.layers.conv2d(inputs=conv_3,
filters=self.config['num_hidden_units'],
kernel_size=(6, self.config['n_channel']),
strides=1,
padding='valid',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
print("conv_4: {}".format(conv_4))
elif self.config['l_win'] == 144:
conv_1 = tf.layers.conv2d(inputs=input_tensor,
filters=self.config['num_hidden_units'] / 16,
kernel_size=(3, self.config['n_channel']),
strides=(4, 1),
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
print("conv_1: {}".format(conv_1))
conv_2 = tf.layers.conv2d(inputs=conv_1,
filters=self.config['num_hidden_units'] / 8,
kernel_size=(3, self.config['n_channel']),
strides=(4, 1),
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
print("conv_2: {}".format(conv_2))
conv_3 = tf.layers.conv2d(inputs=conv_2,
filters=self.config['num_hidden_units'] / 4,
kernel_size=(3, self.config['n_channel']),
strides=(3, 1),
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
print("conv_3: {}".format(conv_3))
conv_4 = tf.layers.conv2d(inputs=conv_3,
filters=self.config['num_hidden_units'],
kernel_size=(3, self.config['n_channel']),
strides=1,
padding='valid',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
print("conv_4: {}".format(conv_4))
encoded_signal = tf.layers.flatten(conv_4)
encoded_signal = tf.layers.dense(encoded_signal,
units=self.config['code_size'] * 4,
activation=tf.nn.leaky_relu,
kernel_initializer=init)
self.code_mean = tf.layers.dense(encoded_signal,
units=self.config['code_size'],
activation=None,
kernel_initializer=init,
name='code_mean')
self.code_std_dev = tf.layers.dense(encoded_signal,
units=self.config['code_size'],
activation=tf.nn.relu,
kernel_initializer=init,
name='code_std_dev')
self.code_std_dev = self.code_std_dev + 1e-2
mvn = tfp.distributions.MultivariateNormalDiag(loc=self.code_mean, scale_diag=self.code_std_dev)
self.code_sample = mvn.sample()
print("finish encoder: \n{}".format(self.code_sample))
print("\n")
with tf.variable_scope('decoder'):
encoded = tf.cond(self.is_code_input, lambda: self.code_input, lambda: self.code_sample)
decoded_1 = tf.layers.dense(encoded,
units=self.config['num_hidden_units'],
activation=tf.nn.leaky_relu,
kernel_initializer=init)
decoded_1 = tf.reshape(decoded_1, [-1, 1, 1, self.config['num_hidden_units']])
if self.config['l_win'] == 24:
decoded_2 = tf.layers.conv2d(decoded_1,
filters=self.config['num_hidden_units'],
kernel_size=1,
padding='same',
activation=tf.nn.leaky_relu)
decoded_2 = tf.reshape(decoded_2, [-1, 4, 1, self.config['num_hidden_units'] // 4])
print("decoded_2 is: {}".format(decoded_2))
decoded_3 = tf.layers.conv2d(decoded_2,
filters=self.config['num_hidden_units'] // 4,
kernel_size=(3, 1),
strides=1,
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
decoded_3 = tf.nn.depth_to_space(input=decoded_3,
block_size=2)
decoded_3 = tf.reshape(decoded_3, [-1, 8, 1, self.config['num_hidden_units'] // 8])
print("decoded_3 is: {}".format(decoded_3))
decoded_4 = tf.layers.conv2d(decoded_3,
filters=self.config['num_hidden_units'] // 8,
kernel_size=(3, 1),
strides=1,
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
decoded_4 = tf.nn.depth_to_space(input=decoded_4,
block_size=2)
decoded_4 = tf.reshape(decoded_4, [-1, 16, 1, self.config['num_hidden_units'] // 16])
print("decoded_4 is: {}".format(decoded_4))
decoded_5 = tf.layers.conv2d(decoded_4,
filters=self.config['num_hidden_units'] // 16,
kernel_size=(3, 1),
strides=1,
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
decoded_5 = tf.nn.depth_to_space(input=decoded_5,
block_size=2)
decoded_5 = tf.reshape(decoded_5, [-1, self.config['num_hidden_units'] // 16, 1, 16])
print("decoded_5 is: {}".format(decoded_5))
decoded = tf.layers.conv2d(inputs=decoded_5,
filters=self.config['n_channel'],
kernel_size=(9, 1),
strides=1,
padding='valid',
activation=None,
kernel_initializer=init)
print("decoded_6 is: {}".format(decoded))
self.decoded = tf.reshape(decoded, [-1, self.config['l_win'], self.config['n_channel']])
elif self.config['l_win'] == 48:
decoded_2 = tf.layers.conv2d(decoded_1,
filters=256 * 3,
kernel_size=1,
padding='same',
activation=tf.nn.leaky_relu)
decoded_2 = tf.reshape(decoded_2, [-1, 3, 1, 256])
print("decoded_2 is: {}".format(decoded_2))
decoded_3 = tf.layers.conv2d(decoded_2,
filters=256,
kernel_size=(3, 1),
strides=1,
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
decoded_3 = tf.nn.depth_to_space(input=decoded_3,
block_size=2)
decoded_3 = tf.reshape(decoded_3, [-1, 6, 1, 128])
print("decoded_3 is: {}".format(decoded_3))
decoded_4 = tf.layers.conv2d(decoded_3,
filters=128,
kernel_size=(3, 1),
strides=1,
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
decoded_4 = tf.nn.depth_to_space(input=decoded_4,
block_size=2)
decoded_4 = tf.reshape(decoded_4, [-1, 24, 1, 32])
print("decoded_4 is: {}".format(decoded_4))
decoded_5 = tf.layers.conv2d(decoded_4,
filters=32,
kernel_size=(3, 1),
strides=1,
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
decoded_5 = tf.nn.depth_to_space(input=decoded_5,
block_size=2)
decoded_5 = tf.reshape(decoded_5, [-1, 48, 1, 16])
print("decoded_5 is: {}".format(decoded_5))
decoded = tf.layers.conv2d(inputs=decoded_5,
filters=1,
kernel_size=(5, self.config['n_channel']),
strides=1,
padding='same',
activation=None,
kernel_initializer=init)
print("decoded_6 is: {}".format(decoded))
self.decoded = tf.reshape(decoded, [-1, self.config['l_win'], self.config['n_channel']])
elif self.config['l_win'] == 144:
decoded_2 = tf.layers.conv2d(decoded_1,
filters=32 * 27,
kernel_size=1,
strides=1,
padding='same',
activation=tf.nn.leaky_relu)
decoded_2 = tf.reshape(decoded_2, [-1, 3, 1, 32 * 9])
print("decoded_2 is: {}".format(decoded_2))
decoded_3 = tf.layers.conv2d(decoded_2,
filters=32 * 9,
kernel_size=(3, 1),
strides=1,
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
decoded_3 = tf.nn.depth_to_space(input=decoded_3,
block_size=3)
decoded_3 = tf.reshape(decoded_3, [-1, 9, 1, 32 * 3])
print("decoded_3 is: {}".format(decoded_3))
decoded_4 = tf.layers.conv2d(decoded_3,
filters=32 * 3,
kernel_size=(3, 1),
strides=1,
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
decoded_4 = tf.nn.depth_to_space(input=decoded_4,
block_size=2)
decoded_4 = tf.reshape(decoded_4, [-1, 36, 1, 24])
print("decoded_4 is: {}".format(decoded_4))
decoded_5 = tf.layers.conv2d(decoded_4,
filters=24,
kernel_size=(3, 1),
strides=1,
padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=init)
decoded_5 = tf.nn.depth_to_space(input=decoded_5,
block_size=2)
decoded_5 = tf.reshape(decoded_5, [-1, 144, 1, 6])
print("decoded_5 is: {}".format(decoded_5))
decoded = tf.layers.conv2d(inputs=decoded_5,
filters=1,
kernel_size=(9, self.config['n_channel']),
strides=1,
padding='same',
activation=None,
kernel_initializer=init)
print("decoded_6 is: {}".format(decoded))
self.decoded = tf.reshape(decoded, [-1, self.config['l_win'], self.config['n_channel']])
print("finish decoder: \n{}".format(self.decoded))
print('\n')
# define sigma2 parameter to be trained to optimise ELBO
with tf.variable_scope('sigma2_dataset'):
if self.config['TRAIN_sigma'] == 1:
sigma = tf.Variable(tf.cast(self.config['sigma'], tf.float32),
dtype=tf.float32, trainable=True)
else:
sigma = tf.cast(self.config['sigma'], tf.float32)
self.sigma2 = tf.square(sigma)
if self.config['TRAIN_sigma'] == 1:
self.sigma2 = self.sigma2 + self.sigma2_offset
print("sigma2: \n{}\n".format(self.sigma2))
class lstmKerasModel:
def __init__(self, data):
pass
def create_lstm_model(self, config):
lstm_input = tf.keras.layers.Input(shape=(config['l_seq'] - 1, config['code_size']))
LSTM1 = tf.keras.layers.LSTM(config['num_hidden_units_lstm'], return_sequences=True)(lstm_input)
LSTM2 = tf.keras.layers.LSTM(config['num_hidden_units_lstm'], return_sequences=True)(LSTM1)
lstm_output = tf.keras.layers.LSTM(config['code_size'], return_sequences=True, activation=None)(LSTM2)
lstm_model = tf.keras.Model(lstm_input, lstm_output)
lstm_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=config['learning_rate_lstm']),
loss='mse',
metrics=['mse'])
return lstm_model
def produce_embeddings(self, config, model_vae, data, sess):
self.embedding_lstm_train = np.zeros((data.n_train_lstm, config['l_seq'], config['code_size']))
for i in range(data.n_train_lstm):
feed_dict = {
model_vae.original_signal: data.train_set_lstm['data'][i],
model_vae.is_code_input: False,
model_vae.code_input: np.zeros((1, config['code_size']))}
self.embedding_lstm_train[i] = sess.run(model_vae.code_mean, feed_dict=feed_dict)
print("Finish processing the embeddings of the entire dataset.")
print("The first a few embeddings are\n{}".format(self.embedding_lstm_train[0, 0:5]))
self.x_train = self.embedding_lstm_train[:, :config['l_seq'] - 1]
self.y_train = self.embedding_lstm_train[:, 1:]
self.embedding_lstm_test = np.zeros((data.n_val_lstm, config['l_seq'], config['code_size']))
for i in range(data.n_val_lstm):
feed_dict = {
model_vae.original_signal: data.val_set_lstm['data'][i],
model_vae.is_code_input: False,
model_vae.code_input: np.zeros((1, config['code_size']))}
self.embedding_lstm_test[i] = sess.run(model_vae.code_mean, feed_dict=feed_dict)
self.x_test = self.embedding_lstm_test[:, :config['l_seq'] - 1]
self.y_test = self.embedding_lstm_test[:, 1:]
def load_model(self, lstm_model, config, checkpoint_path):
print(config['checkpoint_dir_lstm'] + 'checkpoint')
if os.path.isfile(config['checkpoint_dir_lstm'] + 'checkpoint'):
lstm_model.load_weights(checkpoint_path)
print("LSTM model loaded.")
else:
print("No LSTM model loaded.")
def train(self, config, lstm_model, cp_callback):
lstm_model.fit(self.x_train, self.y_train,
validation_data=(self.x_test, self.y_test),
batch_size=config['batch_size_lstm'],
epochs=config['num_epochs_lstm'],
callbacks=[cp_callback])
def plot_reconstructed_lt_seq(self, idx_test, config, model_vae, sess, data, lstm_embedding_test):
feed_dict_vae = {
model_vae.original_signal: np.zeros((config['l_seq'], config['l_win'], config['n_channel'])),
model_vae.is_code_input: True,
model_vae.code_input: self.embedding_lstm_test[idx_test]}
decoded_seq_vae = np.squeeze(sess.run(model_vae.decoded, feed_dict=feed_dict_vae))
print("Decoded seq from VAE: {}".format(decoded_seq_vae.shape))
feed_dict_lstm = {
model_vae.original_signal: np.zeros((config['l_seq'] - 1, config['l_win'], config['n_channel'])),
model_vae.is_code_input: True,
model_vae.code_input: lstm_embedding_test[idx_test]}
decoded_seq_lstm = np.squeeze(sess.run(model_vae.decoded, feed_dict=feed_dict_lstm))
print("Decoded seq from lstm: {}".format(decoded_seq_lstm.shape))
fig, axs = plt.subplots(config['n_channel'], 2, figsize=(15, 4.5 * config['n_channel']), edgecolor='k')
fig.subplots_adjust(hspace=.4, wspace=.4)
axs = axs.ravel()
for j in range(config['n_channel']):
for i in range(2):
axs[i + j * 2].plot(np.arange(0, config['l_seq'] * config['l_win']),
np.reshape(data.val_set_lstm['data'][idx_test, :, :, j],
(config['l_seq'] * config['l_win'])))
axs[i + j * 2].grid(True)
axs[i + j * 2].set_xlim(0, config['l_seq'] * config['l_win'])
axs[i + j * 2].set_xlabel('samples')
if config['n_channel'] == 1:
axs[0 + j * 2].plot(np.arange(0, config['l_seq'] * config['l_win']),
np.reshape(decoded_seq_vae, (config['l_seq'] * config['l_win'])), 'r--')
axs[1 + j * 2].plot(np.arange(config['l_win'], config['l_seq'] * config['l_win']),
np.reshape(decoded_seq_lstm, ((config['l_seq'] - 1) * config['l_win'])), 'g--')
else:
axs[0 + j * 2].plot(np.arange(0, config['l_seq'] * config['l_win']),
np.reshape(decoded_seq_vae[:, :, j], (config['l_seq'] * config['l_win'])), 'r--')
axs[1 + j * 2].plot(np.arange(config['l_win'], config['l_seq'] * config['l_win']),
np.reshape(decoded_seq_lstm[:, :, j], ((config['l_seq'] - 1) * config['l_win'])), 'g--')
axs[0 + j * 2].set_title('VAE reconstruction - channel {}'.format(j))
axs[1 + j * 2].set_title('LSTM reconstruction - channel {}'.format(j))
for i in range(2):
axs[i + j * 2].legend(('ground truth', 'reconstruction'))
savefig(config['result_dir'] + "lstm_long_seq_recons_{}.pdf".format(idx_test))
fig.clf()
plt.close()
def plot_lstm_embedding_prediction(self, idx_test, config, model_vae, sess, data, lstm_embedding_test):
self.plot_reconstructed_lt_seq(idx_test, config, model_vae, sess, data, lstm_embedding_test)
fig, axs = plt.subplots(2, config['code_size'] // 2, figsize=(15, 5.5), edgecolor='k')
fig.subplots_adjust(hspace=.4, wspace=.4)
axs = axs.ravel()
for i in range(config['code_size']):
axs[i].plot(np.arange(1, config['l_seq']), np.squeeze(self.embedding_lstm_test[idx_test, 1:, i]))
axs[i].plot(np.arange(1, config['l_seq']), np.squeeze(lstm_embedding_test[idx_test, :, i]))
axs[i].set_xlim(1, config['l_seq'] - 1)
axs[i].set_ylim(-2.5, 2.5)
axs[i].grid(True)
axs[i].set_title('Embedding dim {}'.format(i))
axs[i].set_xlabel('windows')
if i == config['code_size'] - 1:
axs[i].legend(('VAE\nembedding', 'LSTM\nembedding'))
savefig(config['result_dir'] + "lstm_seq_embedding_{}.pdf".format(idx_test))
fig.clf()
plt.close()
trainer.py,对训练好的模型参数进行保存,对实验结果可视化处理,并保存解码器输出结果
from base import BaseTrain
import numpy as np
import matplotlib.pylab as plt
from matplotlib.pyplot import savefig
from scipy.stats import multivariate_normal
class vaeTrainer(BaseTrain):
def __init__(self, sess, model, data, config):
super(vaeTrainer, self).__init__(sess, model, data, config)
def train_epoch(self):
self.cur_epoch = self.model.cur_epoch_tensor.eval(self.sess)
# training
self.sess.run(self.model.iterator.initializer,
feed_dict={
self.model.original_signal: self.data.train_set_vae['data'],
self.model.seed: self.cur_epoch})
self.n_train_iter = self.data.n_train_vae // self.config['batch_size']
idx_check_point = (self.n_train_iter - 1)
train_loss_cur_epoch = 0.0
for i in range(self.n_train_iter):
loss = self.train_step()
self.sess.run(self.model.increment_global_step_tensor)
self.train_loss.append(np.squeeze(loss))
train_loss_cur_epoch = train_loss_cur_epoch + loss
if i == idx_check_point:
test_loss, test_recons_loss_weighted, test_kl, test_sigma_regularisor, test_code_std_norm, test_cur_sigma2, test_recons_loss_ls = self.test_step()
self.train_loss_ave_epoch.append(train_loss_cur_epoch / self.n_train_iter)
# validation
self.iter_epochs_list.append(self.n_train_iter * (self.cur_epoch + 1))
self.sess.run(self.model.iterator.initializer,
feed_dict={
self.model.original_signal: self.data.val_set_vae['data'],
self.model.seed: self.cur_epoch})
self.n_val_iter = self.data.n_val_vae // self.config['batch_size']
val_loss_cur_epoch = 0.0
for i in range(self.n_val_iter):
val_loss = self.val_step()
val_loss_cur_epoch = val_loss_cur_epoch + val_loss
self.val_loss_ave_epoch.append(val_loss_cur_epoch / self.n_val_iter)
# save the model parameters at the end of this epoch
self.model.save(self.sess)
print(
"{}/{}, test loss: -elbo: {:.4f}, recons_loss_weighted: {:.4f}, recons_loss_ls: {:.4f}, KL_loss: {:.4f}, sigma_regularisor: {:.4f}, code_std_dev: {}".format(
self.cur_epoch,
self.config['num_epochs_vae'] - 1,
test_loss,
test_recons_loss_weighted,
np.squeeze(np.mean(test_recons_loss_ls)),
test_kl,
test_sigma_regularisor,
np.squeeze(test_code_std_norm)))
print("Loss on training and val sets:\ntrain: {:.4f}, val: {:.4f}".format(
self.train_loss_ave_epoch[self.cur_epoch],
self.val_loss_ave_epoch[self.cur_epoch]))
print("Current sigma2: {:.7f}".format(test_cur_sigma2))
# save the current variables
self.save_variables_VAE()
# reconstruction plot
self.plot_reconstructed_signal()
# generate samples from prior
self.generate_samples_from_prior()
# plot the training and validation loss over iterations/epochs
self.plot_train_and_val_loss()
def train_step(self):
batch_image = self.sess.run(self.model.input_image)
feed_dict = {
self.model.original_signal: batch_image,
self.model.is_code_input: False,
self.model.code_input: np.zeros((1, self.config['code_size'])),
self.model.lr: self.config['learning_rate_vae'] * (0.98 ** self.cur_epoch)}
train_loss, _ = self.sess.run([self.model.elbo_loss, self.model.train_step_gradient],
feed_dict=feed_dict)
return train_loss
def val_step(self):
input_image_val = self.sess.run(self.model.input_image)
val_cost, recon_loss_val, kl_loss_val, std_dev_loss_val = self.sess.run([self.model.elbo_loss,
self.model.ls_reconstruction_error,
self.model.KL_loss,
self.model.std_dev_norm],
feed_dict={
self.model.original_signal: input_image_val,
self.model.is_code_input: False,
self.model.code_input: np.zeros(
(1, self.config['code_size']))})
self.val_loss.append(np.squeeze(val_cost))
self.recons_loss_val.append(np.squeeze(np.mean(recon_loss_val)))
self.KL_loss_val.append(kl_loss_val)
return val_cost
def test_step(self):
feed_dict = {
self.model.original_signal: self.data.test_set_vae['data'],
self.model.is_code_input: False,
self.model.code_input: np.zeros((1, self.config['code_size']))}
self.output_test, test_loss, test_recons_loss_weighted, test_kl, test_sigma_regularisor, test_code_std_norm, test_cur_sigma2, test_recons_loss_ls = self.sess.run(
[self.model.decoded,
self.model.elbo_loss,
self.model.weighted_reconstruction_error_dataset,
self.model.KL_loss,
self.model.sigma_regularisor_dataset,
self.model.std_dev_norm,
self.model.sigma2,
self.model.ls_reconstruction_error],
feed_dict=feed_dict)
self.test_sigma2.append(np.squeeze(test_cur_sigma2))
return test_loss, test_recons_loss_weighted, test_kl, test_sigma_regularisor, test_code_std_norm, np.squeeze(
test_cur_sigma2), test_recons_loss_ls
def plot_reconstructed_signal(self):
input_images = np.squeeze(self.data.test_set_vae['data'])
decoded_images = np.squeeze(self.output_test)
n_images = 20
# plot the reconstructed image for a shape
for j in range(self.config['n_channel']):
fig, axs = plt.subplots(4, 5, figsize=(18, 10), edgecolor='k')
fig.subplots_adjust(hspace=.4, wspace=.4)
axs = axs.ravel()
for i in range(n_images):
if self.config['n_channel'] == 1:
axs[i].plot(input_images[i])
axs[i].plot(decoded_images[i])
else:
axs[i].plot(input_images[i, :, j])
axs[i].plot(decoded_images[i, :, j])
axs[i].grid(True)
axs[i].set_xlim(0, self.config['l_win'])
axs[i].set_ylim(-5, 5)
if i == 19:
axs[i].legend(('original', 'reconstructed'))
plt.suptitle('Channel {}'.format(j))
savefig(self.config['result_dir'] + 'test_reconstructed_{}_{}.pdf'.format(self.cur_epoch, j))
fig.clf()
plt.close()
def generate_samples_from_prior(self):
rv = multivariate_normal(np.zeros(self.config['code_size']), np.diag(np.ones(self.config['code_size'])))
# Generate a batch size of samples from the prior samples
n_images = 20
samples_code_prior = rv.rvs(n_images)
sampled_images = self.sess.run(self.model.decoded,
feed_dict={
self.model.original_signal: np.zeros(
(n_images, self.config['l_win'], self.config['n_channel'])),
self.model.is_code_input: True,
self.model.code_input: samples_code_prior})
sampled_images = np.squeeze(sampled_images)
for j in range(self.config['n_channel']):
fig, axs = plt.subplots(4, 5, figsize=(18, 10), edgecolor='k')
fig.subplots_adjust(hspace=.4, wspace=.4)
axs = axs.ravel()
for i in range(n_images):
if self.config['n_channel'] == 1:
axs[i].plot(sampled_images[i])
else:
axs[i].plot(sampled_images[i, :, j])
axs[i].grid(True)
axs[i].set_xlim(0, self.config['l_win'])
axs[i].set_ylim(-5, 5)
plt.suptitle('Channel {}'.format(j))
savefig(self.config['result_dir'] + 'generated_samples_{}_{}.pdf'.format(self.cur_epoch, j))
fig.clf()
plt.close()
希望对大家有所帮助,共同进步,互相学习!