这只是简单的对比,结果如下:
方法 | 时间 |
---|---|
tf.function | 1.5030秒/次 |
fit | 1.9616秒/次 |
cpu | 2.1381次/秒 |
可能不同的配置会有不同的变化,这个只是简单的对比。
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential, layers
import time
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
# 定义超参数
h_dim = 20 # 把原来的784维护降低到20维度;
batchsz = 100 # fashion_mnist
# 数据集加载
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train=x_train.astype(np.float32)/255.
x_test=x_test.astype(np.float32)/255.
x_train=x_train.reshape([-1,784])
x_test = x_test.reshape([-1,784])
# 无监督学习,标签其实就是本身
TRAIN_BUF = 60000
TEST_BUF = 10000
train_dataset = tf.data.Dataset.from_tensor_slices(x_train).shuffle(TRAIN_BUF).batch(batchsz)
test_dataset = tf.data.Dataset.from_tensor_slices(x_test).shuffle(TEST_BUF).batch(batchsz)
# 搭建模型
class AE(keras.Model):
# 1. 初始化部分
def __init__(self):
super(AE, self).__init__() # 调用父类的函数
# Encoders编码, 网络
self.encoder = Sequential([
layers.Dense(256, activation=keras.activations.relu),
#layers.Dense(128, activation=keras.activations.relu),
layers.Dense(h_dim)
])
# Decoders解码,网络
self.decoder = Sequential([
#layers.Dense(128, activation=keras.activations.relu),
layers.Dense(256, activation=keras.activations.relu),
layers.Dense(784)
])
# 2. 前向传播的过程
def call(self, inputs, training=None):
# [b, 784] ==> [b, 10]
h = self.encoder(inputs)
# [b, 10] ==> [b, 784]
x_hat = self.decoder(h)
return x_hat
# 创建模型
@tf.function
def train_ae(x):
with tf.GradientTape() as tape:
x_rec_logits = model(x)
# 把每个像素点当成一个二分类的问题;
rec_loss = tf.losses.binary_crossentropy(x, x_rec_logits, from_logits=True)
# rec_loss = tf.losses.MSE(x, x_rec_logits)
rec_loss = tf.reduce_mean(rec_loss)
grads = tape.gradient(rec_loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
return rec_loss
model = AE()
model.build(input_shape=(None, 784))
model.summary()
optimizer = keras.optimizers.SGD(learning_rate=2.0, momentum=0.5)
print("开始训练")
epochs = 10
time_function=[]
for epoch in range(epochs):
start_time = time.time()
for train_x in train_dataset:
loss = train_ae(train_x)
elapse_time = time.time()-start_time
time_function.append(elapse_time)
print(f'训练第{epoch+1}次,耗时:{elapse_time:0.3f},误差:{loss:0.4f}')
print(f'function训练时间:{sum(time_function)/epochs:.4f}')
model_fit = AE()
model_fit.compile(optimizer=optimizer, loss='mse')
start_time=time.time()
hist = model_fit.fit(x_train, x_train, epochs=epochs, shuffle=True, batch_size=batchsz)
elapse_time = time.time()-start_time
print(f'GPU时间:{elapse_time/epochs:.4f}')
with tf.device('/cpu:0'):
model_cpu = AE()
model_cpu.compile(optimizer=optimizer, loss='mse')
start_time = time.time()
hist = model_cpu.fit(x_train, x_train, epochs=epochs, shuffle=True, batch_size=batchsz)
elapse_time = time.time() - start_time
print(f'cpu训练平均时间: {elapse_time / epochs:.4f}')