最近在积攒粉丝500,大家帮帮忙,动动小手指关注、点赞、收藏…
VGG网络结构是牛津大学著名研究组VGG( Visual Geometry Group )在2014年提出的卷积神经网络结构,受AlexNet网络启发的,其探索卷积神经网络的深度和其性能之间的关系,优化方向属于“网络加深” 。
VGG网络是ILSVRC-2014竞赛分类任务第二名,其错误率为7.5%左右,跟冠军GoogleNet相差不多。
论文地址:https://arxiv.org/abs/1409.1556
VGG网络主要有几点突出贡献:
VGG网络结构根据卷积核大小和卷积层数目的不同,设计了A,A-LRN,B,C,D,E共6种配置。
其中D(VGG16)和E(VGG19)两种网络结构较为常用。
网络6种结构比较:
对于给定的感受野(与输出有关的输入图片的局部大小),采用堆积的小卷积核是优于采用大的卷积核,因为多层非线性层可以增加网络深度来保证学习更复杂的模式,而且代价还比较小(参数更少)。
VGG中采用连续的几个3x3的卷积核代替AlexNet中的较大卷积核(11x11,7x7,5x5)。
具体做法:
例如:特征图28x28,步长1,填充0。
import tensorflow as tf
from tensorflow.keras.layers import Conv2D,MaxPool2D,Flatten,Dropout
from tensorflow.keras.layers import Dense,Softmax,BatchNormalization,Layer
class ConvBA(Layer):
""" Conv->relu->bn """
def __init__(self, filters, kernel_size, strides=1,
padding='valid', activation=None):
super(ConvBA, self).__init__()
self.conv1 = Conv2D(filters, kernel_size, strides=strides,
padding=padding, activation=activation)
self.bn1 = BatchNormalization()
def call(self, x):
x = self.conv1(x)
x = self.bn1(x)
return x
class VGG16(tf.keras.Model):
def __init__(self, num_classes=10):
super(VGG16, self).__init__()
# 1. 卷积提取特征
# block1
self.conv11 = ConvBA(64, (3, 3), strides=1, padding="same", activation="relu")
self.conv12 = ConvBA(64, (3, 3), strides=1, padding="same", activation="relu")
self.pool11 = MaxPool2D(pool_size=(2, 2), strides=2, padding='valid')
# block2
self.conv21 = ConvBA(128, (3, 3), strides=1, padding="same", activation="relu")
self.conv22 = ConvBA(128, (3, 3), strides=1, padding="same", activation="relu")
self.pool21 = MaxPool2D(pool_size=(2, 2), strides=2, padding='valid')
# block3
self.conv31 = ConvBA(256, (3, 3), strides=1, padding="same", activation="relu")
self.conv32 = ConvBA(256, (3, 3), strides=1, padding="same", activation="relu")
self.conv33 = ConvBA(256, (1, 1), strides=1, padding="same", activation="relu")
self.pool31 = MaxPool2D(pool_size=(2, 2), strides=2, padding='valid')
# block4
self.conv41 = ConvBA(512, (3, 3), strides=1, padding="same", activation="relu")
self.conv42 = ConvBA(512, (3, 3), strides=1, padding="same", activation="relu")
self.conv43 = ConvBA(512, (1, 1), strides=1, padding="same", activation="relu")
self.pool41 = MaxPool2D(pool_size=(2, 2), strides=2, padding='valid')
# block5
self.conv51 = ConvBA(512, (3, 3), strides=1, padding="same", activation="relu")
self.conv52 = ConvBA(512, (3, 3), strides=1, padding="same", activation="relu")
self.conv53 = ConvBA(512, (1, 1), strides=1, padding="same", activation="relu")
self.pool51 = MaxPool2D(pool_size=(2, 2), strides=2, padding='valid')
# 2. 分类(dense->Dropout->dense->Dropout->dense->softmax), 我为了减少计算,全连接的神经元减少了
self.flatten = Flatten()
self.fc0 = Dense(2048, activation="sigmoid", use_bias=False)
self.drop1 = Dropout(0.5)
self.fc1 = Dense(1024, activation="sigmoid", use_bias=False)
self.drop2 = Dropout(0.5)
self.fc2 = Dense(num_classes, activation="sigmoid", use_bias=False)
self.softmax = Softmax()
def call(self, x, training=False):
"""" 输入X -> 原来(batch,224,224,3), 实验我输入图像112*112
"""
x = self.pool11(self.conv12(self.conv11(x))) # block1
x = self.pool21(self.conv22(self.conv21(x))) # block2
x = self.pool31(self.conv33(self.conv32(self.conv31(x)))) # block3
x = self.pool41(self.conv43(self.conv42(self.conv41(x)))) # block4
x = self.pool51(self.conv53(self.conv52(self.conv51(x)))) # block5
# 分类
x = self.flatten(x) # 展平
x = self.fc0(x) # 2048
if training:
x = self.drop1(x)
x = self.fc1(x) # 1024
if training:
x = self.drop1(x)
x = self.fc2(x) # 输出类别数个数据
x = self.softmax(x) # 转为概率
return x
训练集每个类别有3000张、验证集每类有600张;图片大致如下:
定义基本函数:
# myreaddata.py
import random
import pathlib
"""
将所有数据存放在同一目录下,
# 然后将不同类别的图片分别地存放在各自的类别子目录下
"""
def get_all_image_paths(image_dir):
'''
获取所有图片路径,例如 ['flower_photos\\sunflowers\\4895721242_89014e723c_n.jpg', ...] '''
data_path = pathlib.Path(image_dir)
paths = list(data_path.glob('*/*')) # 图片全路径
paths = [str(p) for p in paths]
# random.shuffle(paths)
return paths
def get_label_and_index(image_dir):
'''获取类别名称及其数字表示,例如
['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']
{'daisy': 0, 'dandelion': 1, 'roses': 2, 'sunflowers': 3, 'tulips': 4}
'''
data_path = pathlib.Path(image_dir)
label_names = sorted(item.name for item in data_path.glob('*/') if item.is_dir())
label_index = dict((name,index) for index,name in enumerate(label_names))
return label_names, label_index
tensorflow2.x的格式读取数据集:
# data_manager.py
import tensorflow as tf
from data_process.myreaddata import *
def process_image(fpath, label):
""" 图片预处理 """
image = tf.io.read_file(fpath) # 读取图像
image = tf.image.decode_jpeg(image,channels=3) # jpg图像解码
image = tf.image.resize(image, [112, 112]) # 原始图片大重设为(x, x), AlexNet的输入是224X224
label = tf.one_hot(label, depth=2) # 标签转成onehot格式,这里实验是标签2个类别数据
return image, label
def get_dataset(image_dir, is_shuffle=False, batch_size=64):
# 获取所有图片路径
image_paths = get_all_image_paths(image_dir)
_, label_index = get_label_and_index(image_dir)
# 每个图片路径名->数字标签
image_labels = [label_index[pathlib.Path(path).parent.name] for path in image_paths]
# tensorflow接口创建数据集读取
ds = tf.data.Dataset.from_tensor_slices((image_paths, image_labels))
ds = ds.map(process_image)
if is_shuffle:
ds = ds.shuffle(buffer_size=len(image_paths))
ds = ds.batch(batch_size)
return ds
import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import layers,losses, metrics
from data_process.data_manager import get_dataset
from network.vgg16 import VGG16
class TrainModel():
def __init__(self, lr=0.01):
self.model = VGG16(num_classes=2) # 定义网络,2分类
self.model.build(input_shape=(None, 112, 112, 3)) # BHWC
self.model.summary()
self.loss_fun = losses.CategoricalCrossentropy() # 定义损失函数, 这里交叉熵
self.opt = tf.optimizers.SGD(learning_rate=lr) # 随机梯度下降优化器
self.train_acc_metric = metrics.CategoricalAccuracy() # 设定统计参数
self.val_acc_metric = metrics.CategoricalAccuracy()
def train(self, fpath="./data/mycatdog2", epochs=300, m=5):
""" 训练网络 """
batch_size = 64
test_acc_list = []
# 读取数据集
train_dataset = get_dataset(os.path.join(fpath, "train"), is_shuffle=True, batch_size=batch_size)
val_dataset = get_dataset(os.path.join(fpath, "valid"), is_shuffle=False, batch_size=batch_size)
# 训练
loss_val = 0
for epoch in range(epochs):
print(" ** Start of epoch {} **".format(epoch))
# 每次获取一个batch的数据来训练
for nbatch, (inputs, labels) in enumerate(train_dataset):
with tf.GradientTape() as tape: # 开启自动求导
y_pred = self.model(inputs, training=True) # 前向计算
loss_val = self.loss_fun(labels, y_pred) # 误差计算
grads = tape.gradient(loss_val, self.model.trainable_variables) # 梯度计算
self.opt.apply_gradients(zip(grads, self.model.trainable_variables)) # 权重更新
self.train_acc_metric(labels, y_pred) # 更新统计传输
if nbatch % m == 0: # 打印
correct = tf.equal(tf.argmax(labels, 1), tf.argmax(y_pred, 1))
acc = tf.reduce_mean(tf.cast(correct, tf.float32))
print('{}-{} train_loss:{:.5f}, train_acc:{:.5f}'.format(epoch, nbatch, float(loss_val), acc))
# 输出统计参数的值
train_acc = self.train_acc_metric.result()
self.train_acc_metric.reset_states()
print('Training acc over epoch: {}, acc:{:.5f}'.format(epoch, float(train_acc)))
# 每次迭代在验证集上测试一次
for nbatch, (inputs, labels) in enumerate(val_dataset):
y_pred = self.model(inputs)
self.val_acc_metric(labels, y_pred)
val_acc = self.val_acc_metric.result()
self.val_acc_metric.reset_states()
print('Valid acc over epoch: {}, acc:{:.5f}'.format(epoch, float(val_acc)))
test_acc_list.append(val_acc)
# 训练完成保存模型
tf.saved_model.save(self.model, "./output/mnist_model")
# 画泛化能力曲线(横坐标是epoch, 测试集上的精度),并保存
x = np.arange(1, len(test_acc_list)+1, 1)
y = np.array(test_acc_list)
plt.plot(x, y)
plt.xlabel("epoch")
plt.ylabel("val_acc")
plt.title('model acc in valid dataset')
plt.savefig("./output/val_acc.png", format='png')
if __name__ == "__main__":
path = "./output"
if not os.path.exists(path):
os.makedirs(path)
model = TrainModel()
model.train(fpath="F:\数据集\mycatdog2")
VGG16我训练了45个epoch, 在训练接上acc达到0.998。在验证集上acc=0.88。
(相同的环境下,仅仅更换了网络:
AlexNet训练300个epoch, 在训练接上acc达到1.00,在验证集上acc=0.85。
而VGG仅训练45个epoch,提升了3个百分点。
也许是我的训练集数据太少,精度提升上不明显)
若是对大家有帮助,不要忘了关注、点赞、收藏哦…