目录
一. VGG 网络模型
二. 代码复现
1. 网络搭建
2.数据集制作(pkl)
3.源码地址
Alexnet是卷积神经网络的开山之作,但是由于卷积核太大,移动步长大,无填充,所以14年提出的VGG网络解决了这一问题。而且VGG网络结构简单,以 基础卷积和池化组成,非常适合用作CNN的入门学习。 通过卷积,池化 不断将图片 的尺寸减小, 维度升高, 提取特征。
VGG的主要贡献是全面评估网络的深度,使用3*3卷积滤波器来提取特征。解决了Alexnet容易忽略小部分的特征。目前常用的有VGG13\VGG16网络,本文实例部分以VGG16为例。VGG模型如下:
如图所示,一共列出了VGG11、VGG13、VGG16、VGG19等6种VGG模型, 以VGG16为例, 因为其具有16个网络层,主要注意的是这其中不包括pooling(池化层)如maxpool,avgpool等,本文以上图中的VGG16(第D列)为例进行讲解。
以上图中的(D列)VGG16为例, 构建模型, train.py训练代码如下,其中模型搭建如network = Sequential所示。
import tensorflow as tf
import numpy as np
import os
import cv2
from tensorflow.keras import layers, optimizers, datasets, Sequential
import sys
from six.moves import cPickle
# 调用 GPU
gpu = tf.config.experimental.list_physical_devices('GPU')
if len(gpu) > 0:
tf.config.experimental.set_memory_growth(gpu[0], True)
def main():
######第一步,train, test # 加载数据
# batchsize 指每次 用作训练样本数, 比如训练样本总数为10000, 其中 训练集 8000张 验证集2000
# Train_batchsize = 80 ,则说明将所有数据训练一轮需要 8000/80 = 100 步 (step = 100) 也称一轮训练迭代100次
# 训练时 还有一个参数 epoch, 表示整个训练 需要轮几次, 如epoch = 100 , 则表示 需要将整个 数据集训练100轮
# 则整个训练 需要 迭代 epoch * (8000/Train_batchsize) = 10000 次
Train_batchsize = 80
val_batchsize = 50
# 加载 训练集 和 验证集,
# x -> 训练用图片数据 x_val -> 验证用图片数据
# y -> 训练用图片标签 y_val -> 验证用图片标签
(x, y), (x_val, y_val) = Cap_load_data()
# 将标签 进行维度 调整 [n, 1] => [n]
y = tf.squeeze(y, axis=1) # [n, 1] => [n]
y_val = tf.squeeze(y_val, axis=1) # [n, 1] => [n]
# 将数据进行打乱,并进行切片
train_db = tf.data.Dataset.from_tensor_slices((x, y))
train_db = train_db.shuffle(1000).map(preprocess).batch(Train_batchsize).repeat()
val_db = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_db = val_db.shuffle(500).map(preprocess).batch(val_batchsize).repeat()
# 验证存储的pkl数据是否和 实际图片的一致
# print(x.shape,y.shape,x_val.shape,y_val.shape)
# print(x[0,0,0,:])
#
# src = cv2.imread(r"XXX.jpg")
# src = cv2.resize(src, (64, 64))
# src = cv2.cvtColor(src, cv2.COLOR_BGR2RGB) # 因为cv2.imread opencv中默认颜色通道为BGR
# print(src[0,0,:])
######第二步,创建模型
# 卷积层取特征
# maxpool层强化特征并且把图片尺寸减小一半
# 这里如果还是两层conv2d就会无法收敛
network = Sequential([ # 64x64
layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.MaxPool2D([2, 2]), #
layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.MaxPool2D([2, 2]), #32x32,
# layers.MaxPool2D([2, 2]),#16x16,
layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.MaxPool2D([2, 2]),
layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.MaxPool2D([2, 2]),
layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.MaxPool2D([2, 2]),
# 转换形状
# layers.Reshape((-1, 512), input_shape=(-1, 1, 1, 512)), # 这里加一个 Reshape层就好啦
layers.Flatten(),
layers.Dense(256, activation=tf.nn.relu),
layers.Dense(128, activation=tf.nn.relu),
layers.Dense(1, activation=tf.nn.sigmoid),
])
network.build(input_shape=[None, 64, 64, 3])
network.summary()
#####第三步,训练参数配置
# 用 keras 的高层API直接训练
# network.compile(
# optimizer=optimizers.Adam(lr=1e-4),
# loss=tf.losses.categorical_crossentropy, # MSE 是个对象, CategoricalCrossentropy 是个类
# metrics=['accuracy']
# )
# network.compile(optimizer='adam',
# loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
# metrics=['accuracy'])
network.compile(optimizer='rmsprop',
loss=tf.losses.binary_crossentropy,
metrics=['accuracy'])
#####第四步,训练
network.fit(train_db, epochs=20, verbose=2, steps_per_epoch=x.shape[0]//Train_batchsize, validation_steps=x_val.shape[0]//val_batchsize, validation_data=val_db, validation_freq=1)
# .fit的verbose 输出日志模式
network.save('XXX.h5')
def preprocess(x, y): # 数据集处理 把data 和 label 都设置为float型
x = tf.cast(x, dtype=tf.float32) / 255.
y = tf.cast(y, dtype=tf.int32)
return x, y
def Cap_load_data():
# path = r'XXXXX'
path = r'XXXXXX' # Data1 在Data基础上加了更多的小电容
fpath = os.path.join(path, 'train.pkl')
x_train, y_train = load_batch(fpath)
fpath = os.path.join(path, 'test.pkl')
x_val, y_val = load_batch(fpath)
y_train = np.reshape(y_train, (len(y_train), 1)) #标签
y_val = np.reshape(y_val, (len(y_val), 1))
# if K.image_data_format() == 'channels_last':
# x_train = x_train.transpose(0, 2, 3, 1)
# x_val = x_val.transpose(0, 2, 3, 1)
return (x_train, y_train), (x_val, y_val)
def load_batch(fpath): # 使用cpick 读取文件
# with open(fpath, 'rb') as f:
# if sys.version_info < (3,):
# d = cPickle.load(f)
# else:
# d = cPickle.load(f, encoding='bytes')
# # decode utf8
# d_decoded = {}
# for k, v in d.items():
# d_decoded[k.decode('utf8')] = v
# d = d_decoded
with open(fpath, 'rb') as f:
d = cPickle.load(f, encoding='bytes')
data = d['data']
labels = d['labels']
data = data.reshape(data.shape[0], 64, 64, 3)
return data, labels
if __name__ == "__main__":
main()
import tensorflow as tf
import numpy as np
import cv2
import os
import sys
from six.moves import cPickle
def main():
pklPath = r"XXX.pkl"
input_size = (64, 64)
data_oneDim = input_size[0] * input_size[1] * 3
# 测试集图片的张数
num_trainImage = 4000
data = np.array([[0 for x in range(data_oneDim)] for y in range(num_trainImage)])
# 就本数据集而言,二分类,正反样例各占50% , 所以每种类设置数量为 num_trainImage/2 标签
label1 = [0 for x in range(int(num_trainImage/2))]
label2 = [1 for x in range(int(num_trainImage/2))]
label = np.array(label1 + label2)
# 上下各2000张 每张64*64 3通道
# 64*64*3 = 12288 4000*12288 的 numpy的uint8s数组
ImageFile1 = r"XXX" + "\\"
ImageNames1 = os.listdir(ImageFile1)
i = 0
for Name1 in ImageNames1:
imagePath = ImageFile1 + Name1
src = cv2.imread(imagePath)
src = cv2.resize(src, (64, 64))
src = cv2.cvtColor(src,cv2.COLOR_BGR2RGB) # 因为cv2.imread opencv中默认颜色通道为BGR
# src_data = np.array([src[:,:,0],src[:,:,1],src[:,:,2]])
src_data = np.array(src)
src_data = src_data.reshape(12288)
data[i] = src_data
i = i + 1
# print(src[0,:,0]) 测试输出 转化后的src_data 第一行的 r 分量是否 和src一致
# print(src_data[:64])
# print(src.shape)
# print(src_data.shape)
ImageFile2 = r"XXX" + "\\"
ImageNames2 = os.listdir(ImageFile2)
for Name2 in ImageNames2:
imagePath = ImageFile2 + Name2
src = cv2.imread(imagePath)
src = cv2.resize(src, input_size)
src = cv2.cvtColor(src,cv2.COLOR_BGR2RGB) # 因为cv2.imread opencv中默认颜色通道为BGR
# src_data = np.array([src[:,:,0],src[:,:,1],src[:,:,2]])
src_data = np.array(src)
src_data = src_data.reshape(data_oneDim)
data[i] = src_data
i = i + 1
Cap_dict = { 'data': data , 'labels':label }
print(Cap_dict['data'].shape)
with open(pklPath,'wb') as f:
cPickle.dump(Cap_dict,f)
if __name__ == "__main__":
main()
https://github.com/mcuwangzaiacm/VGG16_tensorflow2.0