ResNet50 使用cifar-10数据集训练,出现了训练精度高,验证精度仅有75%的问题。还请各位同学不吝赐教!

ResNet50 使用cifar-10数据集训练,出现了训练精度高,验证精度仅有75%的问题。还请各位同学不吝赐教!

由于是第一次在电脑上跑实验,代码是从TensorFlow2.0卷积神经网络实战上的代码,经过自己调试之后程序可以正常执行,但是结果不是很好,还请各位道友不吝赐教!
下面抛出代码:
获取数据集的代码:

import pickle
import  numpy as np
import os
from PIL import Image
import tensorflow as tf

def get_cifar10_train_data_and_label(root = ""):
    print("-------------------------------------------------------------")
    print("获取训练数据集和标签")
    def load_file(filename):
        with open(filename, 'rb') as fo:
            data = pickle.load(fo, encoding='latin1')
        return data
    data_batch_1 = load_file(os.path.join(root, 'data_batch_1'))
    data_batch_2 = load_file(os.path.join(root, 'data_batch_2'))
    data_batch_3 = load_file(os.path.join(root, 'data_batch_3'))
    data_batch_4 = load_file(os.path.join(root, 'data_batch_4'))
    data_batch_5 = load_file(os.path.join(root, 'data_batch_5'))
    dataset = []
    labelset = []
    for data in [data_batch_1,data_batch_2,data_batch_3,data_batch_4,data_batch_5]:
        img_data = (data["data"])
        img_label = (data["labels"])
        for i in img_data:
            i = i.reshape(1,3,32,32)
            i = tf.transpose(i,[0,2,3,1])
            dataset.append(i)
        labelset.append(img_label)
    dataset = np.concatenate(dataset)
    labelset = np.concatenate(labelset)
    print("-------------------------------------------------------------")
    return dataset,labelset

def get_cifar10_test_data_and_label(root = ""):
    print("-------------------------------------------------------------")
    print("获取测试数据集和标签")
    def load_file(filename):
        with open(filename, 'rb') as fo:
            data = pickle.load(fo, encoding='latin1')
        return data
    data_batch_1 = load_file(os.path.join(root, 'test_batch'))
    dataset = []
    labelset = []
    for data in [data_batch_1]:
        img_data = (data["data"])
        img_label = (data["labels"])
        for i in img_data:
            i = i.reshape(1,3,32,32)
            i = tf.transpose(i,[0,2,3,1])
            dataset.append(i)
        labelset.append(img_label)
   
    dataset = np.concatenate(dataset)
    labelset = np.concatenate(labelset)
    print("-------------------------------------------------------------")
    return dataset,labelset




def get_CIFAR100_dataset(root = ""):
    train_dataset,label_dataset = get_cifar10_train_data_and_label(root=" ")
    print(train_dataset.shape)
    test_dataset,test_label_dataset = get_cifar10_test_data_and_label(root=root)
    return  train_dataset,label_dataset,test_dataset,test_label_dataset

    
if __name__ == "__main__":
    get_CIFAR100_dataset(root="C:/Users/thinkpad/SpyderProjects/ResnetDemo/cifar-10-python/cifar-10-batches-py/")
   

下面是定义模型的代码:

import tensorflow as tf
def identity_block(input_tensor,out_dim):
    conv1 = tf.keras.layers.Conv2D(out_dim // 4, kernel_size=1, padding="SAME", activation=tf.nn.relu)(input_tensor)
    conv2 = tf.keras.layers.BatchNormalization()(conv1)
    conv3 = tf.keras.layers.Conv2D(out_dim // 4, kernel_size=3, padding="SAME", activation=tf.nn.relu)(conv2)
    conv4 = tf.keras.layers.BatchNormalization()(conv3)
    conv5 = tf.keras.layers.Conv2D(out_dim, kernel_size=1, padding="SAME")(conv4)
    #残差
    out = tf.keras.layers.Add()([input_tensor, conv5])
    out = tf.nn.relu(out)
    return out
def resnet_Model():
    input_xs = tf.keras.Input(shape=[32,32,3])#rgb图像32,32,3
    conv_1 = tf.keras.layers.Conv2D(filters=32,kernel_size=3,padding="SAME",activation=tf.nn.relu)(input_xs)
    """--------第一层----------"""
    out_dim = 32
    identity_1 = tf.keras.layers.Conv2D(filters=out_dim, kernel_size=3, padding="SAME", activation=tf.nn.relu)(conv_1)
    identity_1 = tf.keras.layers.BatchNormalization()(identity_1)
    for _ in range(3):
        identity_1 = identity_block(identity_1,out_dim)
    """--------第二层----------"""
    out_dim = 64
    identity_2 = tf.keras.layers.Conv2D(filters=out_dim, kernel_size=3, padding="SAME", activation=tf.nn.relu)(identity_1)
    identity_2 = tf.keras.layers.BatchNormalization()(identity_2)
    for _ in range(4):
        identity_2 = identity_block(identity_2,out_dim)
    """--------第三层----------"""
    out_dim = 128
    identity_3 = tf.keras.layers.Conv2D(filters=out_dim, kernel_size=3, padding="SAME", activation=tf.nn.relu)(identity_2)
    identity_3 = tf.keras.layers.BatchNormalization()(identity_3)
    for _ in range(6):
        identity_3 = identity_block(identity_3,out_dim)
    """--------第四层----------"""
    out_dim = 256
    identity_4 = tf.keras.layers.Conv2D(filters=out_dim, kernel_size=3, padding="SAME", activation=tf.nn.relu)(identity_3)
    identity_4 = tf.keras.layers.BatchNormalization()(identity_4)
    for _ in range(3):
        identity_4 = identity_block(identity_4,out_dim)
    flat = tf.keras.layers.Flatten()(identity_4)
    flat = tf.keras.layers.Dropout(0.217)(flat)
    dense = tf.keras.layers.Dense(1024,activation=tf.nn.relu)(flat)
    dense = tf.keras.layers.BatchNormalization()(dense)
    logits = tf.keras.layers.Dense(10,activation=tf.nn.softmax)(dense)
    model = tf.keras.Model(inputs=input_xs, outputs=logits)
    return model
if __name__ == "__main__":
    resnet_model = resnet_Model()
    print(resnet_model.summary())#.2.4、使用ResNet50实战CIFAR10

下面是训练的代码:

import tensorflow as tf

import numpy as np

import getDataset

batch_size  = 64
study_rate = 1e-2

x_train, y_train,x_test, y_test = getDataset.get_CIFAR100_dataset(root="C:/Users/thinkpad/SpyderProjects/ResnetDemo/cifar-10-python/cifar-10-batches-py/")
y_train = np.float32(tf.keras.utils.to_categorical(y_train,num_classes=10))
y_test = np.float32(tf.keras.utils.to_categorical(y_test,num_classes=10))
train_data = tf.data.Dataset.from_tensor_slices((x_train,y_train)).shuffle(batch_size*10).batch(batch_size).repeat(3)


import ResnetModel
model = ResnetModel.resnet_Model()
model.compile(optimizer=tf.optimizers.Adam(study_rate), loss=tf.losses.categorical_crossentropy,metrics = ['accuracy'])
model.fit(train_data, epochs=10)
score = model.evaluate(x_test, y_test)
print("last score:",score)

训练结果和测试结果:
Epoch 100/100
782/782 [] - 136s 173ms/step - loss: 0.0035 - accuracy: 0.9989
313/313 [
] - 8s 24ms/step - loss: 2.0742 - accuracy: 0.7598
last score: [2.074207067489624, 0.7598000168800354]

在使用测试数据集进行测试时,正确率只有75%左右,而ResNet的文章中标明只有6%左右的错误率,这个差距有点大,不知道是什么原因?
有的人说测试数据集少,但是也有5000个样本
原文章没有使用dropout,我的代码中有用,而且BN层也有不少

你可能感兴趣的:(笔记,卷积,神经网络,tensorflow)