用tf2.0训练Alexnet模型(花分类)的过程

前言

最近在学TF2.1,之前学习了pytorch,但是感觉很多时候要用到tf,因此现在开始学习关于2.1版本的,作为新手特此记录每一次的代码,这次训练遇到了很多问题,特此记录。

"""GPU训练"""
from myAlextNet_Module import AlexNet
import tensorflow as tf
import json
import os
import time
import glob
import random
import matplotlib.pylab as plt

os.environ["CUDA_DEVICE_ORDER"] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# gpus = tf.config.experimental.list_physical_devices("GPU")
gpus = tf.config.experimental.list_physical_devices("GPU")
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)
        exit(-1)
root = os.path.abspath(os.getcwd())  # get root path

image_path = root + '/' + 'flower_data/'   # 数据集的路径
train_dir = image_path + 'train'
validation_dir = image_path + 'val'

# 创建文件夹,用于保存输出权重
if not os.path.exists('save_weights'):
    os.makedirs("save_weights")

#### 超参数设置
im_height =224
im_width = 224
batch_size = 64
epoch = 10

# 生成标签字典,{key = "idx': val = 'class_name'},用于推理
data_class = [cla for cla in os.listdir(train_dir)]
class_num = len(data_class)
class_dir = dict(zip(data_class, range(class_num)))
invs_class_dir = dict(zip(range(class_num), data_class))

# 写入到json文件中
json_str = json.dumps(invs_class_dir, indent=4)  # 对数据进行编码
with open("class_indices.json", "w") as json_file:
    json_file.write(json_str)
json_str = json.dumps(invs_class_dir, indent=4)

train_image_list = glob.glob(train_dir + '/*/*.jpg')
random.shuffle(train_image_list)
train_num = len(train_image_list)
# 训练集标签列表
train_label_list = [class_dir[path.split(os.path.sep)[-2]] for path in train_image_list]

val_image_list = glob.glob(validation_dir + '/*/*.jpg')
random.shuffle(val_image_list)
val_num = len(train_image_list)
# 验证集标签列表
val_label_list = [class_dir[path.split(os.path.sep)[-2]] for path in val_image_list]

def process_path(img_path, label):
    label = tf.one_hot(label, depth=class_num)
    image = tf.io.read_file(img_path)
    image = tf.image.decode_jpeg(image, 0)
    image = tf.image.convert_image_dtype(image, dtype=tf.float32) # [0,1]
    image = tf.image.resize(image, (im_height,im_width))
    return image, label


AUTOTUNE = tf.data.experimental.AUTOTUNE

train_dataset = tf.data.Dataset.from_tensor_slices((train_image_list, train_label_list))
train_dataset = train_dataset.shuffle(buffer_size=train_num)\
    .map(process_path, num_parallel_calls=AUTOTUNE).repeat()\
    .batch(batch_size)

val_dataset = tf.data.Dataset.from_tensor_slices((val_image_list, val_label_list))
val_dataset = val_dataset.shuffle(buffer_size=val_num)\
    .map(process_path, num_parallel_calls=AUTOTUNE).repeat()\
    .batch(batch_size)

# 实例化模型
model = AlexNet(class_num=5)
model.build((batch_size,224,224,3))
model.summary()

# using keras high level api for training
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy'])

def scheduler(epoch):
    if epoch < 10:
        return 0.01
    else:
        return 0.001 * tf.math.exp(0.1 * (10 - epoch))

callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myAlex.h5',
                                                save_best_only=True,
                                                save_weights_only=True,
                                                monitor='val_accuracy'),]


history = model.fit(x=train_dataset,
                    steps_per_epoch=train_num // batch_size,
                    epochs=epoch,
                    validation_data=val_dataset,
                    validation_steps=val_num // batch_size,
                    callbacks=callbacks)

# 绘制loss和accuracy 图片
history_dict = history.history
train_loss = history_dict['loss']
train_accuracy = history_dict['accuracy']
val_loss = history_dict['val_loss']
val_accuracy = history_dict['val_accuracy']

# figure 1
plt.figure()
plt.plot(range(epoch), train_loss, label='train_loss')
plt.plot(range(epoch), val_loss, label='val_loss')
plt.legend()    # 添加图例,label
plt.xlabel('epochs')
plt.ylabel('loss')

# figure 2
plt.figure()
plt.plot(range(epoch), train_accuracy, label='accuracy')
plt.plot(range(epoch), val_accuracy, label='val_accuracy')
plt.legend()
plt.xlabel('epochs')
plt.ylabel('accuracy')

plt.show()

预测过程:这里要注意了,如果采用子类模型,实例化的时候一定要build一下,只有build了模型才真正实例化

"""预测predict"""
from myAlextNet_Module import AlexNet
from PIL import Image
import numpy as np
import json
import matplotlib.pyplot as plt
import os
import tensorflow as tf
im_height = 224
im_width = 224
batch_size=16
# load image,修改图片尺寸
img = Image.open('./159079265_d77a9ac920_n.jpg')
# resize image to 224x224
img = img.resize((im_width, im_height))
plt.imshow(img)

# scaling pixel value to (0-1)
img = np.array(img) / 255.

# Add the image to a batch where it's the only member.
img = (np.expand_dims(img, 0))


# 加载json文件字典
try:
    json_file = open('./class_indices.json', 'r')
    class_indict = json.load(json_file)
except Exception as e:
    print(e)
    exit(-1)

# 加载模型
model = AlexNet(class_num=5)# 加载模型权重文件
model.build((batch_size,224,224,3))  # 真正意义上的实例化模型

model.load_weights("./save_weights/myAlex.h5",)
predict = np.squeeze(model.predict(img))
predict_class = np.argmax(predict)
print(class_indict[str(predict_class)], predict[predict_class])
plt.show()


Note:
在数据读取过程中,采用的是tf.data.Dataset生成器,而不是keras下的ImageDataGenerator,前面的方法速度更快,因为可以使用多线程
在网络训练耗时上,CPU负责图像读取以及预处理,GPU负责对网络进行训练,使用Keras数据生成器无法多线程读取数据,GPU在计算时间很快,大部分时间在读取数据,因此大大降低了GPU的利用率,如果使用tf.data.Dataset,他能够让CPU多线程去读取数据,这样加快了读取时间,而不耽误GPU的利用率

你可能感兴趣的:(用tf2.0训练Alexnet模型(花分类)的过程)