创建 tensor
tf.constant()
tf.Variable()
tf.zeros()/tf.zeros_like()
tf.ones()/tf.fill()
查看 tensor 位置
tensor.device
查看维度数目
tensor.ndim
查看形状
tensor.shape
查看数据类型
tensor.dtype
查看形状和类型
tf.rank(tensor)
判断是否为tensor
tf.is_tensor(tensor)
转换数据类型
tf.cast(tensor)
list/numpy转换为tensor
tf.convert_to_tensor(numpy)
tensor 转化为 numpy
tensor.numpy()
均匀分布
tf.random.uniform()
正态分布
tf.random.normal()
截断2个标准差
tf.random.truncated_normal()
乱序数据
tf.random.shuffle()
索引,获取第i行第j列的数据
tensor[i][j] / tensor[i,j]
切片,倒序
tensor[::-1]
获取子tensor
tf.gather() / tf.gather_nd()
改变形状
tf.reshape()
交换维度
tf.transpose()
增加一个维度
tf.expand_dim() / tf.reshape()
从张量形状中移除大小为1的维度.
tf.squeeze() / tf.reshape()
广播
tf.broadcast_to()
复制数据
tf.tile() / tf.repeat()
拼接数据
tf.concat() ## 被拼接的维度可以相同或不同,其他维度必须相同
tf.stack() ## 维度必须相同
分割数据
tf.unstack() ## 指定维度,均分n份
tf.split() ## 指定维度和划分比例
范数 1/2
tf.normal()
均值/最大值/最小值
tf.reduce_mean()/max/min/sum
tensor.min() / tensor.max()
最大值的位置
tf.argmax()/argmin()
判断两变量的相同位置的值是否相同
tf.equal()
元素去重
tf.unique(tensor) ## 配合gather能够返回原值
编码
tf.one_hot()
排序
tf.sort() ## 返回排序后的元素
tf.argsort() ## 返回排序后元素的位置
返回最大的k个值
tf.math.topk() ## 需要配合 indices、values
填充
tf.pad()
裁剪
tf.maximum() / tf.minimum()
tf.clip_by_norm ## 根据范数的值裁剪,只改变大小,不改变方向
tf.clip_bu_global_norm() ## 各个分量同比例缩放
过滤
tf.boolean_mask() ## 筛选符合条件的数据,并返回一维
tf.where() ## 返回符合条件的元素索引/元素,配合gather_nd等同于boolean_mask()
根据indices将updates散布到新的(初始为零)张量.
根据索引对给定shape的零张量中的单个值或切片应用稀疏updates来创建新的张量.
此运算符是tf.gather_nd运算符的反函数,它从给定的张量中提取值或切片.
tf.scatter_nd(indices,updates,shape,name=None)
给定 N 个一维坐标数组 *args,返回 N 维坐标数组的列表输出,用于计算 N 维网格上的表达式.
tf.meshgrid(*args,**kwargs)
将 values 中的张量列表打包成一个张量,该张量比 values 中的每个张量都高一个秩,通过沿 axis 维度打包.
tf.stack(values,axis=0,name='stack')
返回一个 one-hot 张量.索引中由索引表示的位置取值 on_value,而所有其他位置都取值 off_value.
tf.one_hot(indices,depth,on_value=None,off_value=None,axis=None,dtype=None,name=None)
输出函数
## tf.keras.activations
* linear(...):线性激活函数(传递)。
* exponential(...):指数激活函数。
* tanh(...):双曲正切激活函数。
* sigmoid(...):Sigmoid激活函数,sigmoid(x)= 1 /(1 + exp(-x))。
* hard_sigmoid(...):更快逼近的Sigmoid激活函数。
* relu(...):应用整流线性单位激活功能。
* elu(...):指数线性单位。
* selu(...):比例指数线性单位(SELU)。
* softmax(...):Softmax将实向量转换为分类概率向量。
* softplus(...):Softplus激活函数,softplus(x)= log(exp(x)+1)。
* softsign(...):softsign(x)= x /(abs(x)+1)。Softsign 是 Tanh 激活函数的另一个替代选择.
* swish(...):swish激活函数,swish(x)= x * Sigmoid(x)。
损失函数
## tf.keras.losses
* KLD(...):计算y_true和y_pred之间的Kullback-Leibler散度损失。
* MAE(...):计算标签和预测之间的平均绝对误差。
* MAPE(...):计算y_true和y_pred之间的平均绝对百分比误差。
* MSE(...):计算标签和预测之间的均方误差。
* MSLE(...):计算y_true和y_pred之间的均方对数误差。
* binary_crossentropy(...):计算二分类交叉熵损失。
* categorical_crossentropy(...):计算多分类交叉熵损失。
* sparse_categorical_crossentropy(...):计算稀疏的分类交叉熵损失。
* categorical_hinge(...):计算y_true和y_pred之间的分类铰链损耗。
* squared_hinge(...):计算y_true和y_pred之间的平方铰链损耗。
* cosine_similarity(...):计算标签和预测之间的余弦相似度。
* huber(...):计算Huber损失值。
* poisson(...):计算y_true和y_pred之间的泊松损失。
* serialize(...):序列化损失函数或损失实例。
自动求导
with tf.GradientTape(persistent=True) as tape:
pass
上采样
tf.keras.layers.UpSampling2D()
保存/加载模型
network.save_weights() network.load_weights() ## 只保存权重
network.save() tf.keras.models.load_model() ## 保存所有的状态
tf.saved_model.save() tf.saved_model.load() ## 保存模型,其他语言也适用
过拟合
* 交叉验证
* 正则化
* 学习率
* dropout 训练时候需要dropout 测试的时候恢复全连接网络
* 池化。GAP和GMP都是将把每个feature map和类别输出进行了关联,而不是feature map的unit直接和类别输出进行关联。
差别在于,GMP只取每个feature map中的最重要的region,这样会导致,一个feature map中哪怕只有一个region是和某个类相关的,
这个feature map都会对最终的预测产生很大的影响。而GAP则是每个region都进行了考虑,这样可以保证不会被一两个很特殊的region干扰。
填充序列
keras.preprocessing.sequence.pad_sequences(sequences, maxlen=None,dtype='int32',padding='pre',truncating='pre', value=0.)
keras只能接受长度相同的序列输入。因此如果目前序列长度参差不齐,这时需要使用pad_sequences()。
该函数是将序列转化为经过填充以后的一个长度相同的新序列新序列。
2. 网络练习
2.1 搭建dnn网络
# -*- coding: utf-8 -*-
'''
@Time : 2020/8/29 20:04
@Author : litao
@FileName: test.py
@Software: PyCharm
'''
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 屏蔽无关的信息,2是只打印error
def load_data():
'''
加载(读取)数据
:return: 返回训练集和测试集的特征和标签
'''
# x: [60k, 28, 28], # y: [60k]
(x, y), (x_test, y_test) = datasets.mnist.load_data()
print('x_y_train_shape : \n',x.shape, y.shape)
print('x_y_test_shape : \n', x_test.shape, y_test.shape)
print('x_y_train_dtype : \n',x.dtype, y.dtype)
print('x_y_test_dtype : \n', x_test.dtype, y_test.dtype)
print('x_train_min_max : \n',tf.reduce_min(x), tf.reduce_max(x))
print('y_train_min_max : \n',tf.reduce_min(y), tf.reduce_max(y))
print('x_test_min_max : \n',tf.reduce_min(x_test), tf.reduce_max(x_test))
print('y_test_min_max : \n',tf.reduce_min(y_test), tf.reduce_max(y_test))
return x, y, x_test, y_test
def prepare_data(x, y, x_test, y_test):
'''
处理数据
:param x: 训练集的特征
:param y: 训练集的标签
:param x_test: 测试集的特征
:param y_test: 测试集的标签
:return: 训练集和测试集的迭代器
'''
# x: [0,255] => [0,1]
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)
train_db = tf.data.Dataset.from_tensor_slices((x, y)).batch(128)
x_test = tf.convert_to_tensor(x_test, dtype=tf.float32) / 255.
y_test = tf.convert_to_tensor(y_test, dtype=tf.int32)
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(128)
return train_db, test_db
def dnn_net(x, y, init_para):
'''
手动实现网络结构
:return: 返回目标函数的损失,以及各个参数的值
'''
w1, b1, w2, b2, w3, b3 = init_para
# 定义网络结构
# [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b, 256] + [b, 256]
h1 = x @ w1 + tf.broadcast_to(b1, [x.shape[0], 256])
h1 = tf.nn.relu(h1)
h2 = h1 @ w2 + b2 # [b, 256] => [b, 128]
h2 = tf.nn.relu(h2)
out = h2 @ w3 + b3 # [b, 128] => [b, 10]
# compute loss
loss = tf.reduce_mean(tf.square(y - out)) # mse = mean(sum(y-out)^2)
return loss, w1, b1, w2, b2, w3, b3
def train_net(epoch_number, train_db, test_db, ):
'''
构建模型 训练数据和测试数据
:param epoch_number: 训练集遍历的次数
:param train_db: 训练集的迭代器
:param test_db: 测试集的迭代器
:return:
'''
for epoch in range(epoch_number): # iterate db for 10
for step, (x, y) in enumerate(train_db): # for every batch
# x:[128, 28, 28] # y: [128]
x = tf.reshape(x, [-1, 28*28])
y = tf.one_hot(y, depth=10)
# 梯度计算
with tf.GradientTape() as tape: # tf.Variable
loss, w1, b1, w2, b2, w3, b3 = dnn_net(x, y, init_para)
# compute gradients
grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
# tf.Variable, 更新一次之后新的w1变为tf.Tensor类型了。新一次操作之后就会错误。
# assign_sub 原地更新,数据类型保持不变,
w1.assign_sub(learning_rate * grads[0])
b1.assign_sub(learning_rate * grads[1])
w2.assign_sub(learning_rate * grads[2])
b2.assign_sub(learning_rate * grads[3])
w3.assign_sub(learning_rate * grads[4])
b3.assign_sub(learning_rate * grads[5])
if step % 100 == 0:
print(epoch, step, 'loss:', float(loss))
# test/evluation做测试 # 注意这里必须使用当前的(最新的) [w1, b1, w2, b2, w3, b3]
total_correct, total_number = 0, 0
for step, (x_test, y_test) in enumerate(test_db):
x_test = tf.reshape(x_test, [-1, 28 * 28]) ### [b, 28,28] => [b, 28*28]
# [b, 784] => [b, 256] => [b, 128] => [b, 10]
h1 = tf.nn.relu(x_test @ w1 + b1)
h2 = tf.nn.relu(h1 @ w2 + b2)
out = h2 @ w3 + b3
# out: [b, 10] 这里的out属于实数的范围R。
# pred: [b, 10]实数范围映射到[0, 1]范围内。
pred = tf.nn.softmax(out, axis=1) # 是在[b, 10]中的10维度上面。故axis=1
# 预测值:选择概率最大的值所在的位置。[b, 10] ==> [b]
pred = tf.cast(tf.argmax(pred, axis=1), dtype=tf.int32)
# 真实值:只需要把索引和pred比较。
correct = tf.cast(tf.equal(pred, y_test), dtype=tf.int32)
correct = tf.reduce_sum(correct)
total_correct += int(correct) # 总的正确的个数,
total_number += x_test.shape[0] # 总的测试的个数。
# 计算准确率
acc = total_correct / total_number
print("test acc: ", acc)
def main():
# 定义超参数
learning_rate = 1e-3
epoch_number = 10 # iterate的轮数
# 初始化参数
# [b, 784] => [b, 256] => [b, 128] => [b, 10]
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))
init_para = [w1, b1, w2, b2, w3, b3]
# 加载数据
x, y, x_test, y_test = load_data()
# 预处理数据
train_db, test_db = prepare_data(x, y, x_test, y_test)
# 查看单个样本的数据情况
train_iter = iter(train_db)
sample = next(train_iter)
print('sample:\n', sample[0].shape, sample[1].shape)
# 训练数据
train_net(epoch_number, train_db, test_db)
if __name__ == '__main__':
main()
2.2 ResNet18
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Sequential
class BasicBlock(layers.Layer):
def __init__(self, filter_num, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = layers.Conv2D(filter_num, (3, 3), strides=stride, padding='same')
self.bn1 = layers.BatchNormalization()
self.relu = layers.Activation('relu')
self.conv2 = layers.Conv2D(filter_num, (3, 3), strides=1, padding='same')
self.bn2 = layers.BatchNormalization()
if stride != 1:
self.downsample = Sequential()
self.downsample.add(layers.Conv2D(filter_num, (1, 1), strides=stride))
else:
self.downsample = lambda x:x
def call(self, inputs, training=None):
# [b, h, w, c]
out = self.conv1(inputs)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
identity = self.downsample(inputs)
output = layers.add([out, identity])
output = tf.nn.relu(output)
return output
# Res Block 模块。继承keras.Model或者keras.Layer都可以
class ResNet(keras.Model):
# 第一个参数layer_dims:[2, 2, 2, 2] 4个Res Block,每个包含2个Basic Block
# 第二个参数num_classes:我们的全连接输出,取决于输出有多少类。
def __init__(self, layer_dims, num_classes=100): # [2, 2, 2, 2]
super(ResNet, self).__init__()
# 预处理层;实现起来比较灵活可以加 MAXPool2D,
# 从头到尾的顺序,对多个网络层的线性堆叠。使用.add()方法将各层添加到模型中
self.stem = Sequential([layers.Conv2D(64, (3, 3), strides=(1, 1), padding='valid'),
layers.BatchNormalization(),
layers.Activation('relu'),
layers.MaxPool2D(pool_size=(2, 2), strides=(1, 1), padding='same')
])
# 创建4个Res Block;
self.layer1 = self.build_resblock(64, layer_dims[0])
self.layer2 = self.build_resblock(128, layer_dims[1], stride=2)
self.layer3 = self.build_resblock(256, layer_dims[2], stride=2)
self.layer4 = self.build_resblock(512, layer_dims[3], stride=2)
# gap:减少参数
self.gap = layers.GlobalAveragePooling2D()
self.fc = layers.Dense(num_classes)
def call(self, inputs, training=None):
# 前向运算
x = self.stem(inputs)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.gap(x) # [b, c]
x = self.fc(x) # [b, 100]
return x
def build_resblock(self, filter_num, blocks, stride=1):
res_blocks = Sequential()
res_blocks.add(BasicBlock(filter_num, stride))
for _ in range(1, blocks):
res_blocks.add(BasicBlock(filter_num, stride=1))
return res_blocks
def resnet18():
return ResNet([2, 2, 2, 2])
def resnet34():
return ResNet([3, 4, 6, 3])
import tensorflow as tf
from tensorflow.keras import layers, optimizers, datasets, Sequential
import os
# from resnet import resnet18
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
tf.random.set_seed(2345)
def preprocess(x, y):
# [-1~1]
x = tf.cast(x, dtype=tf.float32) / 255. - 0.5
y = tf.cast(y, dtype=tf.int32)
return x,y
def load_data():
(x,y), (x_test, y_test) = datasets.cifar100.load_data()
y = tf.squeeze(y, axis=1) # 从张量形状中移除大小为1的维度.
y_test = tf.squeeze(y_test, axis=1)
print(x.shape, y.shape, x_test.shape, y_test.shape)
train_db = tf.data.Dataset.from_tensor_slices((x,y))
train_db = train_db.shuffle(1000).map(preprocess).batch(512)
test_db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
test_db = test_db.map(preprocess).batch(512)
return train_db, test_db
def main(train_db, test_db):
# [b, 32, 32, 3] => [b, 1, 1, 512]
model = resnet18()
model.build(input_shape=(None, 32, 32, 3))
model.summary()
optimizer = optimizers.Adam(lr=1e-3)
for epoch in range(5):
for step, (x,y) in enumerate(train_db):
with tf.GradientTape() as tape:
logits = model(x) # [b, 32, 32, 3] => [b, 100]
y_onehot = tf.one_hot(y, depth=100) # [b] => [b, 100]
# compute loss
loss = tf.reduce_mean(tf.losses.categorical_crossentropy(
y_onehot, logits, from_logits=True))
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
if step %50 == 0:
print(epoch, step, 'loss:', float(loss))
total_num = 0
total_correct = 0
for x,y in test_db:
logits = model(x)
prob = tf.nn.softmax(logits, axis=1)
pred = tf.argmax(prob, axis=1)
pred = tf.cast(pred, dtype=tf.int32)
correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
correct = tf.reduce_sum(correct)
total_num += x.shape[0]
total_correct += int(correct)
acc = total_correct / total_num
print(epoch, 'acc:', acc)
if __name__ == '__main__':
train_db, test_db = load_data()
# 测试一下sample的形状
sample = next(iter(train_db))
print('sample:', sample[0].shape, sample[1].shape,
tf.reduce_min(sample[0]), tf.reduce_max(sample[0]))
main(train_db, test_db)
2.3 互联网电影评语
# sentiment_analysis_layer
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
def load_data(batch_size, total_words, max_review_len):
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)
# x_train:[b, 80] # x_test: [b, 80]
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batch_size, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batch_size, drop_remainder=True)
print('x_train shape:\n', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:\n', x_test.shape)
return db_train, db_test
class MyRNN(keras.Model):
def __init__(self, output_dim,total_words, embedding_len, max_review_len):
super(MyRNN, self).__init__()
# transform text to embedding representation
# 嵌入层将正整数(下标)转换为具有固定大小的向量
# [b, 80] => [b, 80, 100]
self.embedding = layers.Embedding(total_words, embedding_len,
input_length=max_review_len)
# [b, 80, 100] , h_dim: 64
self.rnn = keras.Sequential([
layers.SimpleRNN(output_dim, dropout=0.5, return_sequences=True, unroll=True),
layers.SimpleRNN(output_dim, dropout=0.5, unroll=True)
])
# fc, [b, 80, 100] => [b, 64] => [b, 1]
self.outlayer = layers.Dense(1)
def call(self, inputs, training=None):
"""
Forward operation:前向运算
net(x) net(x, training=True) :train mode
net(x, training=False): test
:param inputs: [b, 80]
:param training:
:return:
"""
x = inputs # [b, 80] b个句子,每个句子80个单词
x = self.embedding(x) # embedding: [b, 80] => [b, 80, 100]
# rnn cell compute
x = self.rnn(x) # x: [b, 80, 100] => [b, 64]
# out: [b, 64] => [b, 1]
x = self.outlayer(x)
# p(y is pos|x)
prob = tf.sigmoid(x)
return prob
def main():
batch_size = 128 # 没批数据量的大小
total_words = 10000 # 定义常用单词个数,只给常用单词编码
max_review_len = 80 # 定义句子最大单词量
embedding_len = 100 # 定义每个单词的维度
output_dim = 64 # 输出空间的维数。
epochs = 4 # 训练集迭代的次数
db_train, db_test = load_data(batch_size, total_words, max_review_len)
model = MyRNN(output_dim, total_words, embedding_len, max_review_len)
model.compile(optimizer = keras.optimizers.Adam(0.001),
loss = tf.losses.BinaryCrossentropy(),
metrics=['accuracy'])
model.fit(db_train, epochs=epochs, validation_data=db_test)
model.evaluate(db_test)
if __name__ == '__main__':
main()
2.4 autoencoder
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import Sequential, layers
from PIL import Image
from matplotlib import pyplot as plt
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
def save_images(imgs, name):
new_im = Image.new('L', (280, 280))
index = 0
for i in range(0, 280, 28):
for j in range(0, 280, 28):
im = imgs[index]
im = Image.fromarray(im, mode='L')
new_im.paste(im, (i, j))
index += 1
new_im.save(name)
class AE(keras.Model):
def __init__(self, h_dim):
super(AE, self).__init__()
# Encoders
self.encoder = Sequential([
layers.Dense(256, activation=tf.nn.relu),
layers.Dense(128, activation=tf.nn.relu),
layers.Dense(h_dim)
])
# Decoders
self.decoder = Sequential([
layers.Dense(128, activation=tf.nn.relu),
layers.Dense(256, activation=tf.nn.relu),
layers.Dense(784)
])
def call(self, inputs, training=None):
'''
前向计算
'''
# [b, 784] => [b, 10]
h = self.encoder(inputs)
# [b, 10] => [b, 784]
x_hat = self.decoder(h)
return x_hat
def load_data(batchsz):
(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
x_train, x_test = x_train.astype(np.float32) / 255., x_test.astype(np.float32) / 255.
# we do not need label
train_db = tf.data.Dataset.from_tensor_slices(x_train)
train_db = train_db.shuffle(batchsz * 5).batch(batchsz)
test_db = tf.data.Dataset.from_tensor_slices(x_test)
test_db = test_db.batch(batchsz)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
return train_db, test_db
def main(train_db, test_db):
# 定义参数
h_dim = 20 ## 降低后的维度
batchsz = 512 ## 没批数据量的大小
lr = 1e-3
train_db, test_db = load_data(batchsz)
model = AE(h_dim)
model.build(input_shape=(None, 784))
model.summary()
optimizer = tf.optimizers.Adam(lr=lr)
for epoch in range(100):
for step, x in enumerate(train_db):
x = tf.reshape(x, [-1, 784]) #[b, 28, 28] => [b, 784]
with tf.GradientTape() as tape:
x_rec_logits = model(x)
rec_loss = tf.reduce_mean(tf.losses.binary_crossentropy(
x, x_rec_logits, from_logits=True))
grads = tape.gradient(rec_loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
if step % 100 ==0:
print(epoch, step, float(rec_loss))
# evaluation
x = next(iter(test_db))
logits = model(tf.reshape(x, [-1, 784]))
x_hat = tf.sigmoid(logits)
x_hat = tf.reshape(x_hat, [-1, 28, 28]) # [b, 784] => [b, 28, 28]
x_concat = tf.concat([x, x_hat], axis=0) # [b, 28, 28] => [2b, 28, 28]
# save_images(x_concat, 'RunFile/ae_images/concat_rec_epoch_%d.png'%epoch)
x_hat = x_hat.numpy() * 255.
x_hat = x_hat.astype(np.uint8)
save_images(x_hat, 'RunFile/ae_images/rec_epoch_%d.png'%epoch)
if __name__ == '__main__':
main()
2.5 Variational Auto-Encoder
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import Sequential, layers
from PIL import Image
from matplotlib import pyplot as plt
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
def save_images(imgs, name):
new_im = Image.new('L', (280, 280))
index = 0
for i in range(0, 280, 28):
for j in range(0, 280, 28):
im = imgs[index]
im = Image.fromarray(im, mode='L')
new_im.paste(im, (i, j))
index += 1
new_im.save(name)
def load_data(batchsz):
(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
x_train, x_test = x_train.astype(np.float32) / 255., x_test.astype(np.float32) / 255.
# we do not need label
train_db = tf.data.Dataset.from_tensor_slices(x_train)
train_db = train_db.shuffle(batchsz * 5).batch(batchsz)
test_db = tf.data.Dataset.from_tensor_slices(x_test)
test_db = test_db.batch(batchsz)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
return train_db, test_db
class VAE(keras.Model):
def __init__(self, z_dim,):
super(VAE, self).__init__()
# Encoder
self.fc1 = layers.Dense(128)
self.fc2 = layers.Dense(z_dim) # get mean prediction
self.fc3 = layers.Dense(z_dim)
# Decoder
self.fc4 = layers.Dense(128)
self.fc5 = layers.Dense(784)
def encoder(self, x):
h = tf.nn.relu(self.fc1(x))
# get mean
mu = self.fc2(h)
# get variance
log_var = self.fc3(h)
return mu, log_var
def decoder(self, z):
out = tf.nn.relu(self.fc4(z))
out = self.fc5(out)
return out
def reparameterize(self, mu, log_var):
eps = tf.random.normal(log_var.shape)
std = tf.exp(log_var*0.5)
z = mu + std * eps
return z
def call(self, inputs, training=None):
'''前向传播'''
mu, log_var = self.encoder(inputs) # [b, 784] => [b, z_dim], [b, z_dim]
z = self.reparameterize(mu, log_var) # reparameterization trick
x_hat = self.decoder(z)
return x_hat, mu, log_var
def main():
batchsz = 512
lr = 1e-3
h_dim = 20
z_dim = 10
model = VAE(z_dim)
model.build(input_shape=(4, 784))
optimizer = tf.optimizers.Adam(lr)
for epoch in range(1000):
for step, x in enumerate(train_db):
x = tf.reshape(x, [-1, 784])
with tf.GradientTape() as tape:
x_rec_logits, mu, log_var = model(x)
rec_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=x, logits=x_rec_logits)
rec_loss = tf.reduce_sum(rec_loss) / x.shape[0]
# compute kl divergence (mu, var) ~ N (0, 1)
# https://stats.stackexchange.com/questions/7440/kl-divergence-between-two-univariate-gaussians
kl_div = -0.5 * (log_var + 1 - mu**2 - tf.exp(log_var))
kl_div = tf.reduce_sum(kl_div) / x.shape[0]
loss = rec_loss + 1. * kl_div
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
if step % 100 == 0:
print(epoch, step, 'kl div:', float(kl_div), 'rec loss:', float(rec_loss))
# evaluation
z = tf.random.normal((batchsz, z_dim))
logits = model.decoder(z)
x_hat = tf.sigmoid(logits)
x_hat = tf.reshape(x_hat, [-1, 28, 28]).numpy() *255.
x_hat = x_hat.astype(np.uint8)
save_images(x_hat, 'RunFile/vae_images/sampled_epoch%d.png'%epoch)
x = next(iter(test_db))
x = tf.reshape(x, [-1, 784])
x_hat_logits, _, _ = model(x)
x_hat = tf.sigmoid(x_hat_logits)
x_hat = tf.reshape(x_hat, [-1, 28, 28]).numpy() *255.
x_hat = x_hat.astype(np.uint8)
save_images(x_hat, 'RunFile/vae_images/rec_epoch%d.png'%epoch)
if __name__ == '__main__':
main()
2.6 DC-GAN
- 卷积层后面添加批归一化,激活函数不采用relu(对于小于0的,易梯度离散),可以使用leaky_relu
- 输出函数,不采用sigmoid(将实数转化为(0,1),对于gan网络不稳定),通常使用tanh.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class Generator(keras.Model):
'''
上采样,升维过程,类似encoding
通道降低,宽高变大
kernel_size一般小于7
strides:需要精细调整,保证最后输出符合指定的shape
'''
def __init__(self):
# 把类 Generator 的对象转换为父类 keras.Model 的对象
super(Generator, self).__init__()
# 隐藏层
# z: [b, 100] => [b, 3*3*512] => [b, 3, 3, 512] => [b, 64, 64, 3]
self.fc = layers.Dense(3*3*512)
self.conv1 = layers.Conv2DTranspose(filters=256, kernel_size=3,
strides=3, padding='valid')
self.bn1 = layers.BatchNormalization()
self.conv2 = layers.Conv2DTranspose(128, 5, 2, 'valid')
self.bn2 = layers.BatchNormalization()
self.conv3 = layers.Conv2DTranspose(3, 4, 3, 'valid') # 输出rgb通道
def call(self, inputs, training=None):
x = self.fc(inputs) # [z, 100] => [z, 3*3*512]
x = tf.nn.leaky_relu(tf.reshape(x, [-1, 3, 3, 512])) # (2, 3, 3, 512)
x = tf.nn.leaky_relu(self.bn1(self.conv1(x), training=training)) # (2, 9, 9, 256)
x = tf.nn.leaky_relu(self.bn2(self.conv2(x), training=training)) # (2, 21, 21, 128)
x = self.conv3(x) #(2, 64, 64, 3)
x = tf.tanh(x) # 值域(-1,1)
return x
class Discriminator(keras.Model):
def __init__(self):
super(Discriminator, self).__init__()
# [b, 64, 64, 3] => [b, 1]
self.conv1 = layers.Conv2D(filters=64, kernel_size=5, strides=3, padding='valid')
self.conv2 = layers.Conv2D(128, 5, 3, 'valid')
self.bn2 = layers.BatchNormalization()
self.conv3 = layers.Conv2D(256, 5, 3, 'valid')
self.bn3 = layers.BatchNormalization()
# [b, h, w ,c] => [b, -1]
self.flatten = layers.Flatten()
self.fc = layers.Dense(1) # 二分类问题,输出一
def call(self, inputs, training=None):
x = tf.nn.leaky_relu(self.conv1(inputs)) # [2, 64, 64, 3] --> (2, 20, 20, 64)
x = tf.nn.leaky_relu(self.bn2(self.conv2(x), training=training)) # (2, 6, 6, 128)
x = tf.nn.leaky_relu(self.bn3(self.conv3(x), training=training)) # (2, 1, 1, 256)
x = self.flatten(x) # [b, h, w, c] => [b, -1]
logits = self.fc(x) # [b, -1] => [b, 1]
return logits
def main():
# 判别器
d = Discriminator()
# 生成器 保证输出的维度 等于 判别器的输入维度
g = Generator()
x = tf.random.normal([2, 64, 64, 3])
z = tf.random.normal([2, 100])
prob = d(x)
print(prob)
x_hat = g(z)
print(x_hat.shape)
if __name__ == '__main__':
main()
2.7 WGAN-GP
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from scipy.misc import toimage
import glob
from gan import Generator, Discriminator
from dataset import make_anime_dataset
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
def save_result(val_out, val_block_size, image_path, color_mode):
def preprocess(img):
img = ((img + 1.0) * 127.5).astype(np.uint8)
# img = img.astype(np.uint8)
return img
preprocesed = preprocess(val_out)
final_image = np.array([])
single_row = np.array([])
for b in range(val_out.shape[0]):
# concat image into a row
if single_row.size == 0:
single_row = preprocesed[b, :, :, :]
else:
single_row = np.concatenate((single_row, preprocesed[b, :, :, :]), axis=1)
# concat image row to final_image
if (b+1) % val_block_size == 0:
if final_image.size == 0:
final_image = single_row
else:
final_image = np.concatenate((final_image, single_row), axis=0)
# reset single row
single_row = np.array([])
if final_image.shape[2] == 1:
final_image = np.squeeze(final_image, axis=2)
toimage(final_image).save(image_path)
def celoss_ones(logits):
# [b, 1]
# [b] = [1, 1, 1, 1,]
loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
labels=tf.ones_like(logits))
return tf.reduce_mean(loss)
def celoss_zeros(logits):
# [b, 1]
# [b] = [1, 1, 1, 1,]
loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
labels=tf.zeros_like(logits))
return tf.reduce_mean(loss)
def gradient_penalty(discriminator, batch_x, fake_image):
batchsz = batch_x.shape[0]
# [b, h, w, c]
t = tf.random.uniform([batchsz, 1, 1, 1])
# [b, 1, 1, 1] => [b, h, w, c]
t = tf.broadcast_to(t, batch_x.shape)
interplate = t * batch_x + (1 - t) * fake_image
with tf.GradientTape() as tape:
tape.watch([interplate])
d_interplote_logits = discriminator(interplate)
grads = tape.gradient(d_interplote_logits, interplate)
# grads:[b, h, w, c] => [b, -1]
grads = tf.reshape(grads, [grads.shape[0], -1])
gp = tf.norm(grads, axis=1) #[b]
gp = tf.reduce_mean( (gp-1)**2 )
return gp
def d_loss_fn(generator, discriminator, batch_z, batch_x, is_training):
# 1. treat real image as real
# 2. treat generated image as fake
fake_image = generator(batch_z, is_training)
d_fake_logits = discriminator(fake_image, is_training)
d_real_logits = discriminator(batch_x, is_training)
d_loss_real = celoss_ones(d_real_logits)
d_loss_fake = celoss_zeros(d_fake_logits)
gp = gradient_penalty(discriminator, batch_x, fake_image)
loss = d_loss_fake + d_loss_real + 1. * gp # 区别DCGAN
return loss, gp
def g_loss_fn(generator, discriminator, batch_z, is_training):
fake_image = generator(batch_z, is_training)
d_fake_logits = discriminator(fake_image, is_training)
loss = celoss_ones(d_fake_logits)
return loss
def main():
# hyper parameters
z_dim = 100
epochs = 3000000
batch_size = 512
learning_rate = 0.002
is_training = True
img_path = glob.glob(r'./data/faces/*.jpg')
dataset, img_shape, _ = make_anime_dataset(img_path, batch_size)
print(dataset, img_shape)
sample = next(iter(dataset))
print(sample.shape, tf.reduce_max(sample).numpy(),
tf.reduce_min(sample).numpy())
dataset = dataset.repeat()
db_iter = iter(dataset)
generator = Generator()
generator.build(input_shape = (None, z_dim))
discriminator = Discriminator()
discriminator.build(input_shape=(None, 64, 64, 3))
g_optimizer = tf.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5)
d_optimizer = tf.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5)
for epoch in range(epochs):
batch_z = tf.random.uniform([batch_size, z_dim], minval=-1., maxval=1.)
batch_x = next(db_iter)
# train D
with tf.GradientTape() as tape:
d_loss, gp = d_loss_fn(generator, discriminator, batch_z, batch_x, is_training)
grads = tape.gradient(d_loss, discriminator.trainable_variables)
d_optimizer.apply_gradients(zip(grads, discriminator.trainable_variables))
with tf.GradientTape() as tape:
g_loss = g_loss_fn(generator, discriminator, batch_z, is_training)
grads = tape.gradient(g_loss, generator.trainable_variables)
g_optimizer.apply_gradients(zip(grads, generator.trainable_variables))
if epoch % 100 == 0:
print(epoch, 'd-loss:',float(d_loss), 'g-loss:', float(g_loss),
'gp:', float(gp))
z = tf.random.uniform([100, z_dim])
fake_image = generator(z, training=False)
img_path = os.path.join('images', 'wgan-%d.png'%epoch)
save_result(fake_image.numpy(), 10, img_path, color_mode='P')
if __name__ == '__main__':
main()