VGG16模型详解 and 代码搭建

目录

一. VGG 网络模型

二. 代码复现

1. 网络搭建

2.数据集制作(pkl)

3.源码地址


一. VGG 网络模型

         Alexnet是卷积神经网络的开山之作,但是由于卷积核太大,移动步长大,无填充,所以14年提出的VGG网络解决了这一问题。而且VGG网络结构简单,以 基础卷积和池化组成,非常适合用作CNN的入门学习。 通过卷积,池化 不断将图片 的尺寸减小, 维度升高, 提取特征。
        VGG的主要贡献是全面评估网络的深度,使用3*3卷积滤波器来提取特征。解决了Alexnet容易忽略小部分的特征。目前常用的有VGG13\VGG16网络,本文实例部分以VGG16为例。VGG模型如下:

VGG16模型详解 and 代码搭建_第1张图片

        如图所示,一共列出了VGG11、VGG13、VGG16、VGG19等6种VGG模型, 以VGG16为例, 因为其具有16个网络层,主要注意的是这其中不包括pooling(池化层)如maxpool,avgpool等,本文以上图中的VGG16(第D列)为例进行讲解。

二. 代码复现

1. 网络搭建

        以上图中的(D列)VGG16为例, 构建模型, train.py训练代码如下,其中模型搭建如network = Sequential所示。

import tensorflow as tf
import numpy as np
import os
import cv2
from tensorflow.keras import layers, optimizers, datasets, Sequential
import sys
from six.moves import cPickle

# 调用 GPU
gpu = tf.config.experimental.list_physical_devices('GPU')
if len(gpu) > 0:
    tf.config.experimental.set_memory_growth(gpu[0], True)

def main():

    ######第一步,train, test # 加载数据
    #  batchsize 指每次 用作训练样本数, 比如训练样本总数为10000, 其中 训练集 8000张  验证集2000
    #  Train_batchsize = 80 ,则说明将所有数据训练一轮需要 8000/80 = 100 步 (step = 100) 也称一轮训练迭代100次
    #  训练时 还有一个参数 epoch, 表示整个训练 需要轮几次, 如epoch = 100 , 则表示 需要将整个 数据集训练100轮
    #  则整个训练 需要 迭代 epoch * (8000/Train_batchsize) = 10000 次
    Train_batchsize = 80
    val_batchsize = 50
    
    # 加载 训练集 和 验证集, 
    # x -> 训练用图片数据  x_val -> 验证用图片数据  
    # y -> 训练用图片标签  y_val -> 验证用图片标签
    (x, y), (x_val, y_val) = Cap_load_data()
    
    # 将标签 进行维度 调整 [n, 1] => [n]
    y = tf.squeeze(y, axis=1)  # [n, 1] => [n]
    y_val = tf.squeeze(y_val, axis=1)  # [n, 1] => [n]
    
    # 将数据进行打乱,并进行切片
    train_db = tf.data.Dataset.from_tensor_slices((x, y))
    train_db = train_db.shuffle(1000).map(preprocess).batch(Train_batchsize).repeat()
    val_db = tf.data.Dataset.from_tensor_slices((x_val, y_val))
    val_db = val_db.shuffle(500).map(preprocess).batch(val_batchsize).repeat()

    # 验证存储的pkl数据是否和 实际图片的一致
    # print(x.shape,y.shape,x_val.shape,y_val.shape)
    # print(x[0,0,0,:])
    #
    # src = cv2.imread(r"XXX.jpg")
    # src = cv2.resize(src, (64, 64))
    # src = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)  # 因为cv2.imread  opencv中默认颜色通道为BGR
    # print(src[0,0,:])

    ######第二步,创建模型
    # 卷积层取特征
    # maxpool层强化特征并且把图片尺寸减小一半
    # 这里如果还是两层conv2d就会无法收敛
    network = Sequential([    # 64x64
        layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
        layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
        layers.MaxPool2D([2, 2]),  #

        layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
        layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
        layers.MaxPool2D([2, 2]), #32x32,
        # layers.MaxPool2D([2, 2]),#16x16,

        layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
        layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
        layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
        layers.MaxPool2D([2, 2]),

        layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
        layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
        layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
        layers.MaxPool2D([2, 2]),

        layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
        layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
        layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
        layers.MaxPool2D([2, 2]),

        # 转换形状
        # layers.Reshape((-1, 512), input_shape=(-1, 1, 1, 512)), # 这里加一个 Reshape层就好啦
        layers.Flatten(),
        layers.Dense(256, activation=tf.nn.relu),
        layers.Dense(128, activation=tf.nn.relu),
        layers.Dense(1, activation=tf.nn.sigmoid),
    ])

    network.build(input_shape=[None, 64, 64, 3])
    network.summary()

    #####第三步,训练参数配置
    # 用 keras 的高层API直接训练
    # network.compile(
    #    optimizer=optimizers.Adam(lr=1e-4),
    #    loss=tf.losses.categorical_crossentropy, # MSE 是个对象, CategoricalCrossentropy 是个类
    #    metrics=['accuracy']
    # )
    # network.compile(optimizer='adam',
    #                 loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    #                 metrics=['accuracy'])
    network.compile(optimizer='rmsprop',
                  loss=tf.losses.binary_crossentropy,
                  metrics=['accuracy'])

    #####第四步,训练
    network.fit(train_db, epochs=20, verbose=2, steps_per_epoch=x.shape[0]//Train_batchsize, validation_steps=x_val.shape[0]//val_batchsize, validation_data=val_db, validation_freq=1)
    # .fit的verbose 输出日志模式

    network.save('XXX.h5')


def preprocess(x, y):   # 数据集处理  把data 和 label 都设置为float型
    x = tf.cast(x, dtype=tf.float32) / 255.
    y = tf.cast(y, dtype=tf.int32)
    return x, y

def Cap_load_data():
    # path = r'XXXXX'
    path = r'XXXXXX' # Data1 在Data基础上加了更多的小电容
    fpath = os.path.join(path, 'train.pkl')
    x_train, y_train = load_batch(fpath)
    fpath = os.path.join(path, 'test.pkl')
    x_val, y_val = load_batch(fpath)

    y_train = np.reshape(y_train, (len(y_train), 1))  #标签
    y_val = np.reshape(y_val, (len(y_val), 1))

    # if K.image_data_format() == 'channels_last':
    #     x_train = x_train.transpose(0, 2, 3, 1)
    #     x_val = x_val.transpose(0, 2, 3, 1)

    return (x_train, y_train), (x_val, y_val)

def load_batch(fpath):  # 使用cpick 读取文件
    # with open(fpath, 'rb') as f:
    #     if sys.version_info < (3,):
    #       d = cPickle.load(f)
    #     else:
    #       d = cPickle.load(f, encoding='bytes')
    #       # decode utf8
    #       d_decoded = {}
    #       for k, v in d.items():
    #         d_decoded[k.decode('utf8')] = v
    #       d = d_decoded
    with open(fpath, 'rb') as f:
        d = cPickle.load(f, encoding='bytes')
    data = d['data']
    labels = d['labels']
    data = data.reshape(data.shape[0], 64, 64, 3)
    return data, labels


if __name__ == "__main__":
    main()

2.数据集制作(pkl)

import tensorflow as tf
import numpy as np
import cv2
import os
import sys
from six.moves import cPickle

def main():
    pklPath = r"XXX.pkl"
    input_size = (64, 64)
    data_oneDim = input_size[0] * input_size[1] * 3
    # 测试集图片的张数
    num_trainImage = 4000
    data = np.array([[0 for x in range(data_oneDim)] for y in range(num_trainImage)])
    # 就本数据集而言,二分类,正反样例各占50% , 所以每种类设置数量为 num_trainImage/2 标签
    label1 = [0 for x in range(int(num_trainImage/2))]
    label2 = [1 for x in range(int(num_trainImage/2))]
    label = np.array(label1 + label2)


    # 上下各2000张 每张64*64  3通道
    # 64*64*3 = 12288   4000*12288 的 numpy的uint8s数组
    ImageFile1 = r"XXX" + "\\"
    ImageNames1 = os.listdir(ImageFile1)
    i = 0
    for Name1 in ImageNames1:
        imagePath = ImageFile1 + Name1
        src = cv2.imread(imagePath)
        src = cv2.resize(src, (64, 64))
        src = cv2.cvtColor(src,cv2.COLOR_BGR2RGB)  # 因为cv2.imread  opencv中默认颜色通道为BGR
        # src_data = np.array([src[:,:,0],src[:,:,1],src[:,:,2]])
        src_data = np.array(src)
        src_data = src_data.reshape(12288)
        data[i] = src_data
        i = i + 1
        # print(src[0,:,0])      测试输出  转化后的src_data 第一行的 r 分量是否 和src一致
        # print(src_data[:64])
        # print(src.shape)
        # print(src_data.shape)

    ImageFile2 = r"XXX" + "\\"
    ImageNames2 = os.listdir(ImageFile2)
    for Name2 in ImageNames2:
        imagePath = ImageFile2 + Name2
        src = cv2.imread(imagePath)
        src = cv2.resize(src, input_size)
        src = cv2.cvtColor(src,cv2.COLOR_BGR2RGB)  # 因为cv2.imread  opencv中默认颜色通道为BGR
        # src_data = np.array([src[:,:,0],src[:,:,1],src[:,:,2]])
        src_data = np.array(src)
        src_data = src_data.reshape(data_oneDim)
        data[i] = src_data
        i = i + 1

    Cap_dict = { 'data': data , 'labels':label }
    print(Cap_dict['data'].shape)

    with open(pklPath,'wb') as f:
        cPickle.dump(Cap_dict,f)

if __name__ == "__main__":
    main()

3.源码地址

https://github.com/mcuwangzaiacm/VGG16_tensorflow2.0
        

你可能感兴趣的:(#,分类,深度学习,VGG16,CNN,分类)