Tensorflow2深度学习模型训练

导入库包

import numpy as np
import random
import cv2
import os
from tqdm import tqdm
import tensorflow as tf #rensorflow2
from tensorflow.keras import layers
tf.compat.v1.disable_eager_execution()

定义模型和损失函数

模型和损失函数按需编写

 def model(Input):
     x1 = tf.cast(Input, tf.float16)
     """Layer 0: Just a conventional Conv2D layer"""
     # 16个核,尺寸为5*5,步长为1,保持大小,ReLU激活
     conv1 = layers.Conv2D(filters=16, kernel_size=5, strides=1, padding='same', activation='relu', name='conv1')(x1)
     """Layer N: Model Layer"""
     pred=...
     return pred
def loss_function(y_true,y_pred):
	return tf.reduce_sum(tf.square(y_true-y_pred))

分配数据集为训练和测试两部分

path:存放影像/标签的文件夹,例如:\dataset\train\label或\dataset\train\image,文件夹下为规则裁剪后的测试用例,如*\google_dataset\train\label\0.png。path不区分train和valid,而是通过测试比例划分。
test_train:测试数据量占总数据量的比例,例如,有100组数据,test_train为0.25即75组为训练集、25组为测试集,如果自己划分好了就不用这个了。
image_train、image_test:训练集和测试集的文件名称列表,例如:[‘0.tif’,‘1.tif’,…]
emmmmmmmm,作者偷懒所以只存文件名,在加载数据的时候再拼接文件夹路径,不偷懒可以把文件全路径写在txt或者csv里面,按行读取。如果训练中途(未完成全部epoch)会有中断,需要把列表存起来写到txt或者csv里面,省得重新训练的时候用了不一样的数据。

def get_set(path, test_train=0.25):
    image_all = [img for img in os.listdir(path)]
    random.shuffle(image_all)
    image_test = []
    for i in range(int(len(image_all) * test_train)):
        image_test.append(image_all[i])
    image_train = list(set(image_all) ^ set(image_test))
    return image_train, image_test

读取数据

问:上面为什么没有直接把数据集读完做成Dataset,而是只拿了名字呢?
答:因为电脑内存不足啊,只能选择在每个batch开始的时候拿到文件名,然后读取文件作为输入。
实现:以batch_size将image_train进行划分,每次只读取image_train[idx,idx+batch_size]的文件名, load_data之后就可以作为模型的输入了。

def load_data(input_list):
    img = np.array([_load_data(os.path.join(DATASET_PATH + '\image', i)) for i in input_list])
    label = np.array([_load_data(os.path.join(DATASET_PATH+'\label',i)) for i in input_list])
    return img, label
def _load_data(path):
    img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
    if img.ndim == 2:
        img = np.expand_dims(np.array(img > 0, dtype=np.int), axis=-1)
    return img

定义训练和评估

# 定义图
input_shape=[256,256,3]
Input= tf.compat.v1.placeholder(shape=[BATCH_SZ,input_shape[0],input_shape[1],input_shape[2]],dtype=tf.float32,name='Input')
label = tf.compat.v1.placeholder(shape=[BATCH_SZ,input_shape[0], input_shape[1], 1], dtype=tf.float32,name='label')
pred = model(Input)
tf.compat.v1.add_to_collection('predictions', pred)
loss = loss_function(label,pred)
opt=tf.compat.v1.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(loss)
# config
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.6
sess = tf.compat.v1.Session(config=config)
saver = tf.compat.v1.train.Saver(max_to_keep=5, var_list=tf.compat.v1.global_variables())
init = tf.compat.v1.global_variables_initializer()
sess.run(init)
# 训练
train_set, val_set = get_set(DATASET_PATH + '\input', test_train=0.2)
valid_sz = len(val_set)
train_sz = len(train_set)
print("the number of train data is", train_sz)
print("the number of val data is", valid_sz)
best_loss = 100000
iter_train_epoch = np.int(np.ceil(train_sz / BATCH_SZ))
iter_val_epoch = np.int(np.ceil(valid_sz/ BATCH_SZ))
with tf.device('/gpu:0'):
	for epoch in range(MAX_EPOCH):
		total_loss = 0
        random.shuffle(train_set)
        print(f"epoch {epoch+1}:")
        # training
        for _iter in tqdm(range(iter_train_epoch),desc='training',position=0):
        	start_idx = _iter * BATCH_SZ
            end_idx = (_iter + 1) * BATCH_SZ
            input_t = train_set[start_idx:end_idx]
            batch_train_input,batch_train_label = load_data(input_t)
            _,t_loss, t_out = sess.run([opt,loss,pred],
            								feed_dict={
            								Input: batch_train_input,
            								label: batch_train_label
            								})
            total_train_loss += t_loss
        ave_train_loss = total_train_loss / iter_train_epoch
        # validation
        for _iter in tqdm(range(iter_valid_epoch),desc='validation',position=0):
        	start_idx = _iter * BATCH_SZ
            end_idx = (_iter + 1) * BATCH_SZ
            input_v = val_set[start_idx:end_idx]
            batch_valid_input,batch_valid_label = load_data(input_v)
            v_loss, v_out = sess.run([loss,pred],
            								feed_dict={
            								Input: batch_valid_input,
            								label: batch_valid_label
            								})
            total_valid_loss += v_loss
        ave_valid_loss = total_valid_loss / iter_valid_epoch
        print(f"train loss is {ave_train_loss},validation loss is {ave_valid_loss}")

你可能感兴趣的:(Python,深度学习,深度学习,tensorflow,python)