[b,seq_len,feature_len]
[b,28,28]
从sequence角度理解,把每一个图片当作28个时间序列的输入,每个输入有28个特征。共有b个图片。
每个单词对应的三个波形,每个波形在每个时刻对应一个word vector,每个时刻代表当前输入的word。
word embedding 需要满足:
net=layers.Embedding(10,4) #10表示单词总数,4表示dim的长度,[1,4]
net(x)
提取语意信息。
但如果句子很长,有很多词,参数量太大,且没有语意相关性,对语序也没有做处理。只代表当前单词的语义,未考虑整个句子的特征和信息。
往neural network中加入一个memory
把上一个时刻的output储存在memory中输入给下一个时刻的hidden layer。使得对于当前单词词义分析时,考虑了上一个单词的信息。可以使用最后一个时刻的output作为分类指标,因为该output包含了整个句子的文本信息。
用一个二分类分类这个句子的语意:pos/neg
参数weight只有两个
梯度下降的过程过参与计算会对 w R w_R wR产生高次项方的计算。
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import datasets,layers,optimizers,Sequential,metrics
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
from resnet import resnet18
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
tf.random.set_seed(22)
#np.random.seed(22)
#assert tf.__version__.startswith("2.")
#常见单词数量
total_words=10000
max_review_len=80
embedding_len=100
(x_train,y_train),(x_test,y_test)=keras.datasets.imdb.load_data(num_words=total_words)
#将多个句子padding为相同的长度
#x_train:[b,80,1] 有多少个句子,每个句子有多少个单词
#x_test:[b,80,1]
x_train=keras.preprocessing.sequence.pad_sequences(x_train,maxlen=max_review_len)
x_test=keras.preprocessing.sequence.pad_sequences(x_test,maxlen=max_review_len)
batchsize=128
train_db=tf.data.Dataset.from_tensor_slices((x_train,y_train))
train_db=train_db.shuffle(1000).batch(batchsize,drop_remainder=True)
test_db=tf.data.Dataset.from_tensor_slices((x_test,y_test))
test_db=test_db.batch(batchsize,drop_remainder=True)
print('x_train shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)
# x_train shape: [25000,80] x_train shape:[25000,80]
# y=1 :pos y=0: neg
class MyRNN(keras.Model):
def __init__(self,units):
super(MyRNN,self).__init__()
#[b,64]
self.state0=[tf.zeros([batchsize,units])]
self.state1 = [tf.zeros([batchsize, units])]
#transform text to embedding representation
#[b,80]=>[b,80,100]
self.embedding=layers.Embedding(total_words,embedding_len,
input_length=max_review_len)
#h_dim=64
#RNN cell1,cell2,cell3
#SimpleRNN
self.rnn_cell0=layers.SimpleRNNCell(units,dropout=0.2)
self.rnn_cell1 = layers.SimpleRNNCell(units, dropout=0.2)
#fc [b,80,100]=>[b,64]==>[b]
self.outlayer=layers.Dense(1)
def call(self,inputs,training=None):
'''
net(x) net(x,training=True):train mode
net(x,training=False) :test mode
:param inputs: [b,80]
:param training:
:return:
'''
#[b,80]
x=inputs
#embedding:[b,80]=>[b,80,100]
x=self.embedding(x)
##rnn cell compute
#[b,80,100]=>[b,64]
state0=self.state0
state1=self.state1
for word in tf.unstack(x,axis=1): #word:[b,1,100]
#h1=x@w+h0@whh
out0,state0=self.rnn_cell0(word,state0,training)
out1,state1=self.rnn_cell1(out0,state1,training)
#out:[b,64]=>[b,1]
x=self.outlayer(out1)#只用最后一个时刻的out
#p(y is pos|x)
prob=tf.sigmoid(x)
return prob
def main():
units=64
epoch=4
model=MyRNN(units)
model.compile(optimizer=keras.optimizers.Adam(0.001),
loss=tf.losses.BinaryCrossentropy(),
metrics=["accuracy"],
experimental_run_tf_function = False)
model.fit(train_db,epochs=epoch,validation_data=test_db)
model.evaluate(test_db)
if __name__=="__main__":
main()
t时刻的output对于第一个时刻的储存的output的偏导,都是过程中权值的t次方,
做clipping,检查每个时刻gradient的信息,如果gradient超出一个阈值,就拿梯度除以它的模再乘阈值,缩小了它的数值,保留它梯度更新的方向
层数越深,对于前几层的layer的梯度更新时,它的误差接近于0,后面几层会有效的更新,前面几层梯度就不更新了。
4个inputs,一个output
对memory做梯度时,会按照三道门展开,以累加的形式求到,不会出现很大或很小的问题。
对于LSTM,有一个直通的通道直接将之前的memory输入到下一时刻,在rnn中,需要乘一个权值才能输入previous memory。
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import datasets,layers,optimizers,Sequential,metrics
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
from resnet import resnet18
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
tf.random.set_seed(22)
#np.random.seed(22)
#assert tf.__version__.startswith("2.")
#常见单词数量
total_words=10000
max_review_len=80
embedding_len=100
(x_train,y_train),(x_test,y_test)=keras.datasets.imdb.load_data(num_words=total_words)
#将多个句子padding为相同的长度
#x_train:[b,80,1] 有多少个句子,每个句子有多少个单词
#x_test:[b,80,1]
x_train=keras.preprocessing.sequence.pad_sequences(x_train,maxlen=max_review_len)
x_test=keras.preprocessing.sequence.pad_sequences(x_test,maxlen=max_review_len)
batchsize=128
train_db=tf.data.Dataset.from_tensor_slices((x_train,y_train))
train_db=train_db.shuffle(1000).batch(batchsize,drop_remainder=True)
test_db=tf.data.Dataset.from_tensor_slices((x_test,y_test))
test_db=test_db.batch(batchsize,drop_remainder=True)
print('x_train shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)
# x_train shape: [25000,80] x_train shape:[25000,80]
# y=1 :pos y=0: neg
class MyRNN(keras.Model):
def __init__(self,units):
super(MyRNN,self).__init__()
#[b,64]
self.state0=[tf.zeros([batchsize,units]),tf.zeros([batchsize,units])] #需要给一个和一个h
self.state1 = [tf.zeros([batchsize, units]),tf.zeros([batchsize,units])]
#transform text to embedding representation
#[b,80]=>[b,80,100]
self.embedding=layers.Embedding(total_words,embedding_len,
input_length=max_review_len)
#h_dim=64
#RNN cell1,cell2,cell3
#SimpleRNN
# self.rnn_cell0=layers.SimpleRNNCell(units,dropout=0.2)
# self.rnn_cell1 = layers.SimpleRNNCell(units, dropout=0.2)
self.rnn_cell0=layers.LSTMCell(units,dropout=0.5)
self.rnn_cell1=layers.LSTMCell(units,dropout=0.5)
#fc [b,80,100]=>[b,64]==>[b]
self.outlayer=layers.Dense(1)
def call(self,inputs,training=None):
'''
net(x) net(x,training=True):train mode
net(x,training=False) :test mode
:param inputs: [b,80]
:param training:
:return:
'''
#[b,80]
x=inputs
#embedding:[b,80]=>[b,80,100]
x=self.embedding(x)
##rnn cell compute
#[b,80,100]=>[b,64]
state0=self.state0
state1=self.state1
for word in tf.unstack(x,axis=1): #word:[b,1,100]
#h1=x@w+h0@whh
out0,state0=self.rnn_cell0(word,state0,training)
out1,state1=self.rnn_cell1(out0,state1,training)
#out:[b,64]=>[b,1]
x=self.outlayer(out1)#只用最后一个时刻的out
#p(y is pos|x)
prob=tf.sigmoid(x)
return prob
def main():
units=64
epoch=4
model=MyRNN(units)
model.compile(optimizer=keras.optimizers.Adam(0.001),
loss=tf.losses.BinaryCrossentropy(),
metrics=["accuracy"],
experimental_run_tf_function = False)
model.fit(train_db,epochs=epoch,validation_data=test_db)
model.evaluate(test_db)
if __name__=="__main__":
main()
与普通RNN相比:将SimpleRNNCell 变为LSTMCell
每一层的初始化的状态要加入c和h
简化为两个门
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import datasets,layers,optimizers,Sequential,metrics
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
from resnet import resnet18
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
tf.random.set_seed(22)
#np.random.seed(22)
#assert tf.__version__.startswith("2.")
#常见单词数量
total_words=10000
max_review_len=80
embedding_len=100
(x_train,y_train),(x_test,y_test)=keras.datasets.imdb.load_data(num_words=total_words)
#将多个句子padding为相同的长度
#x_train:[b,80,1] 有多少个句子,每个句子有多少个单词
#x_test:[b,80,1]
x_train=keras.preprocessing.sequence.pad_sequences(x_train,maxlen=max_review_len)
x_test=keras.preprocessing.sequence.pad_sequences(x_test,maxlen=max_review_len)
batchsize=128
train_db=tf.data.Dataset.from_tensor_slices((x_train,y_train))
train_db=train_db.shuffle(1000).batch(batchsize,drop_remainder=True)
test_db=tf.data.Dataset.from_tensor_slices((x_test,y_test))
test_db=test_db.batch(batchsize,drop_remainder=True)
print('x_train shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)
# x_train shape: [25000,80] x_train shape:[25000,80]
# y=1 :pos y=0: neg
class MyRNN(keras.Model):
def __init__(self,units):
super(MyRNN,self).__init__()
#[b,64]
self.state0=[tf.zeros([batchsize,units])] #需要给一个和一个h
self.state1 = [tf.zeros([batchsize, units])]
#transform text to embedding representation
#[b,80]=>[b,80,100]
self.embedding=layers.Embedding(total_words,embedding_len,
input_length=max_review_len)
#h_dim=64
#RNN cell1,cell2,cell3
#SimpleRNN
# self.rnn_cell0=layers.SimpleRNNCell(units,dropout=0.2)
# self.rnn_cell1 = layers.SimpleRNNCell(units, dropout=0.2)
self.rnn_cell0=layers.GRUCell(units,dropout=0.5)
self.rnn_cell1=layers.GRUCell(units,dropout=0.5)
#fc [b,80,100]=>[b,64]==>[b]
self.outlayer=layers.Dense(1)
def call(self,inputs,training=None):
'''
net(x) net(x,training=True):train mode
net(x,training=False) :test mode
:param inputs: [b,80]
:param training:
:return:
'''
#[b,80]
x=inputs
#embedding:[b,80]=>[b,80,100]
x=self.embedding(x)
##rnn cell compute
#[b,80,100]=>[b,64]
state0=self.state0
state1=self.state1
for word in tf.unstack(x,axis=1): #word:[b,1,100]
#h1=x@w+h0@whh
out0,state0=self.rnn_cell0(word,state0,training)
out1,state1=self.rnn_cell1(out0,state1,training)
#out:[b,64]=>[b,1]
x=self.outlayer(out1)#只用最后一个时刻的out
#p(y is pos|x)
prob=tf.sigmoid(x)
return prob
def main():
units=64
epoch=4
model=MyRNN(units)
model.compile(optimizer=keras.optimizers.Adam(0.001),
loss=tf.losses.BinaryCrossentropy(),
metrics=["accuracy"],
experimental_run_tf_function = False)
model.fit(train_db,epochs=epoch,validation_data=test_db)
model.evaluate(test_db)
if __name__=="__main__":
main()
为什么非监督学习?
在原有image上加入一个noise 分布
在train时将一些权值设为0,即断开,即当前得到的输入并不来自于上一层的所有output。在test上恢复所有dropout的数据。
希望latent layer更逼近于一个给定的分布。
当z的分布与生成模型的概率分布越接近,即kl散度越低时,模型性能更好。
对于variational auto-encoder 此处的latent layer是一个分布,不可导。
kl散度作为正则项可以保证机器在学习的时候不会产生方差很小的分布,会使latent space的分布更加平滑。
根据latent distributions不同维度的特征进行sample可以通过decoder生成不同特征的图片
latent distribution上的每个维度都代表了image的某个特征,对于数字,可能是角度和数字类别。
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import datasets,layers,optimizers,Sequential,metrics
from PIL import Image
from matplotlib import pyplot as plt
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
tf.random.set_seed(22)
def save_images(imgs,name):#把多张image保存到一张image里
new_im=Image.new("L",(280,280))
index=0
for i in range(0,280,28):
for j in range(0,280,28):
im=imgs[index]
im=Image.fromarray(im,mode="L")
new_im.paste(im,(i,j))
index+=1
new_im.save(name)
h_dim=20 #784=>20
batch_size=512
lr=1e-3
(x_train,y_train),(x_test,y_test)=datasets.fashion_mnist.load_data()
x_train,x_test=x_train.astype(np.float32)/255.,x_test.astype(np.float32)/255.
print(x_train.shape,y_train.shape)
#对训练集做处理
db_train=tf.data.Dataset.from_tensor_slices((x_train))
batchsize=512
db_train=db_train.shuffle(10000).batch(batchsize)
#对测试集做处理
db_test=tf.data.Dataset.from_tensor_slices((x_test))
db_test=db_test.batch(batchsize)
print(x_train.shape,y_train.shape)
class AE(keras.Model):
def __init__(self):
super(AE,self).__init__()
#encoders,三层
self.encoder=Sequential([
layers.Dense(256,activation=tf.nn.relu),
layers.Dense(128,activation=tf.nn.relu),
layers.Dense(h_dim)
])
#Decoder
self.decoder = Sequential([
layers.Dense(128, activation=tf.nn.relu),
layers.Dense(256, activation=tf.nn.relu),
layers.Dense(784)
])
def call(self, inputs, training=None):
#[b,784]=>[b,20]
h=self.encoder(inputs)
# [b,20]=>[b,784]
x_hat=self.decoder(h)
return x_hat
model=AE()
model.build(input_shape=(None,784))
model.summary()
optimizer=tf.optimizers.Adam(lr=lr)
for epoch in range(100):
for step,x in enumerate(db_train):
#[b,28,28]=>[b,784]
x=tf.reshape(x,[-1,784])
with tf.GradientTape() as tape:
x_rec_logits=model(x)
rec_loss=tf.losses.binary_crossentropy(x,x_rec_logits,from_logits=True)
rec_loss=tf.reduce_mean(rec_loss)
grads=tape.gradient(rec_loss,model.trainable_variables)
optimizer.apply_gradients(zip(grads,model.trainable_variables))
if step%100==0:
print(epoch,step,float(rec_loss))
#evaluation
x=next(iter(db_test))
logits=model(tf.reshape(x,[-1,784]))
x_hat=tf.sigmoid(logits)
#[b,784]
x_hat=tf.reshape(x_hat,[-1,28,28])
#[b,28,28]=>[2b,28,28]
x_concat=tf.concat([x,x_hat],axis=0)
x_concat=x_concat.numpy()*255.
x_concat=x_concat.astype(np.uint8)
save_images(x_concat,"autoencoder-images/rec_epoch_%d.png"%epoch)
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import datasets,layers,optimizers,Sequential,metrics
from PIL import Image
from matplotlib import pyplot as plt
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
tf.random.set_seed(22)
def save_images(imgs,name):#把多张image保存到一张image里
new_im=Image.new("L",(280,280))
index=0
for i in range(0,280,28):
for j in range(0,280,28):
im=imgs[index]
im=Image.fromarray(im,mode="L")
new_im.paste(im,(i,j))
index+=1
new_im.save(name)
z_dim=10 #784=>20
batch_size=512
lr=1e-3
(x_train,y_train),(x_test,y_test)=datasets.fashion_mnist.load_data()
x_train,x_test=x_train.astype(np.float32)/255.,x_test.astype(np.float32)/255.
print(x_train.shape,y_train.shape)
#对训练集做处理
db_train=tf.data.Dataset.from_tensor_slices((x_train))
batchsize=512
db_train=db_train.shuffle(10000).batch(batchsize)
#对测试集做处理
db_test=tf.data.Dataset.from_tensor_slices((x_test))
db_test=db_test.batch(batchsize)
print(x_train.shape,y_train.shape)
class VAE(keras.Model):
def __init__(self):
super(VAE,self).__init__()
#encoder
self.fc1=layers.Dense(128)
self.fc2=layers.Dense(z_dim) #get mean prediction
self.fc3=layers.Dense(z_dim)
#decoder
self.fc4=layers.Dense(128)
self.fc5=layers.Dense(784)
def encoder(self,x):
h=tf.nn.relu(self.fc1(x))
#get mean
mu=self.fc2(h)
#get variance log是为负无穷到正无穷
log_var=self.fc3(h)
return mu,log_var
def decoder(self,z):
out=tf.nn.relu(self.fc4(z))
out=self.fc5(out)
return out
def reparametize(self,mu,log_var):
eps=tf.random.normal(log_var.shape)
std=tf.exp(log_var*0.5)
z=mu+std*eps
return z
def call(self, inputs, training=None):
#[b,784]=>[b,z_dim],[b,z_dim]
mu,log_var=self.encoder(inputs)
#reparamterization trick
z=self.reparametize(mu,log_var)
x_hat=self.decoder(z)
return x_hat,mu,log_var
model=VAE()
model.build(input_shape=(4,784))
optimizer=tf.optimizers.Adam(lr)
for epoch in range(1000):
for step,x in enumerate(db_train):
x=tf.reshape(x,[-1,784])
with tf.GradientTape() as tape:
x_rec_logits, mu, log_var=model(x)
rec_loss = tf.losses.binary_crossentropy(x, x_rec_logits, from_logits=True)
rec_loss = tf.reduce_mean(rec_loss)/x.shape[0]
#compute kl divergence(mu,var)~N(0,1)
kl_div=-0.5*(log_var+1-mu**2-tf.exp(log_var))
kl_div=tf.reduce_sum(kl_div)/x.shape[0]
loss=rec_loss+1*kl_div
grads=tape.gradient(loss,model.trainable_variables)
optimizer.apply_gradients(zip(grads,model.trainable_variables))
if epoch%100==0:
print(epoch,step,"kl div",float(kl_div),"rec_loss",float(rec_loss))
#evaluation
#测试生成模型的性能
z=tf.random.normal((batch_size,z_dim))
logits=model.decoder(z)
x_hat=tf.sigmoid(logits)
x_hat=tf.reshape(logits,[-1,28,28]).numpy()*255.
x_hat=x_hat.astype(np.uint8)
save_images(x_hat,"autoencoder-images-vae/sample_epoch%d.png"%epoch)
x=iter(next(db_test))
x_hat_logits,_,_=model(x)
x_hat = tf.sigmoid(x_hat_logits)
x_hat = tf.reshape(x_hat, [-1, 28, 28]).numpy() * 255.
save_images(x_hat, "autoencoder-images-vae/rec_epoch%d.png" % epoch)