tesnforflow之tf.function,fit和使用cpu训练自编码器所需时间

这只是简单的对比,结果如下:

方法 时间
tf.function 1.5030秒/次
fit 1.9616秒/次
cpu 2.1381次/秒

可能不同的配置会有不同的变化,这个只是简单的对比。

import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential, layers
import time

tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert  tf.__version__.startswith('2.')

# 定义超参数
h_dim = 20         # 把原来的784维护降低到20维度;
batchsz = 100      # fashion_mnist
# 数据集加载
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train=x_train.astype(np.float32)/255.
x_test=x_test.astype(np.float32)/255.
x_train=x_train.reshape([-1,784])
x_test = x_test.reshape([-1,784])
# 无监督学习,标签其实就是本身
TRAIN_BUF = 60000
TEST_BUF = 10000
train_dataset = tf.data.Dataset.from_tensor_slices(x_train).shuffle(TRAIN_BUF).batch(batchsz)
test_dataset = tf.data.Dataset.from_tensor_slices(x_test).shuffle(TEST_BUF).batch(batchsz)

# 搭建模型
class AE(keras.Model):
    # 1. 初始化部分
    def __init__(self):
        super(AE, self).__init__()   # 调用父类的函数

        # Encoders编码, 网络
        self.encoder = Sequential([
            layers.Dense(256, activation=keras.activations.relu),
            #layers.Dense(128, activation=keras.activations.relu),
            layers.Dense(h_dim)

        ])

        # Decoders解码,网络
        self.decoder = Sequential([
            #layers.Dense(128, activation=keras.activations.relu),
            layers.Dense(256, activation=keras.activations.relu),
            layers.Dense(784)

        ])

        # 2. 前向传播的过程

    def call(self, inputs, training=None):
        # [b, 784] ==> [b, 10]
        h = self.encoder(inputs)
        # [b, 10] ==> [b, 784]
        x_hat = self.decoder(h)

        return x_hat

    # 创建模型

@tf.function
def train_ae(x):
    with tf.GradientTape() as tape:
        x_rec_logits = model(x)
        # 把每个像素点当成一个二分类的问题;
        rec_loss = tf.losses.binary_crossentropy(x, x_rec_logits, from_logits=True)
        # rec_loss = tf.losses.MSE(x, x_rec_logits)
        rec_loss = tf.reduce_mean(rec_loss)

    grads = tape.gradient(rec_loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return rec_loss

model = AE()
model.build(input_shape=(None, 784))
model.summary()
optimizer = keras.optimizers.SGD(learning_rate=2.0, momentum=0.5)

print("开始训练")
epochs = 10
time_function=[]

for epoch in range(epochs):
    start_time = time.time()
    for train_x in train_dataset:
        loss = train_ae(train_x)
    elapse_time = time.time()-start_time
    time_function.append(elapse_time)
    print(f'训练第{epoch+1}次,耗时:{elapse_time:0.3f},误差:{loss:0.4f}')

print(f'function训练时间:{sum(time_function)/epochs:.4f}')

model_fit = AE()
model_fit.compile(optimizer=optimizer, loss='mse')
start_time=time.time()
hist = model_fit.fit(x_train, x_train, epochs=epochs, shuffle=True, batch_size=batchsz)
elapse_time = time.time()-start_time
print(f'GPU时间:{elapse_time/epochs:.4f}')



with tf.device('/cpu:0'):
    model_cpu = AE()
    model_cpu.compile(optimizer=optimizer, loss='mse')
    start_time = time.time()
    hist = model_cpu.fit(x_train, x_train, epochs=epochs, shuffle=True, batch_size=batchsz)
    elapse_time = time.time() - start_time
    print(f'cpu训练平均时间: {elapse_time / epochs:.4f}')

你可能感兴趣的:(TensorFlow,Python,tensorflow)