chainer-图像分类-VGG代码重构【附源码】

文章目录

  • 前言
  • 一、VGG网络结构
  • 二、代码实现
    • 1.引入必须要的库
    • 2.模型构建
      • 版本1(官方)
      • 版本2(结合pytorch的进行修改)
  • 三、模型调用


前言

  使用chainer重构VGG模型,可读性更高
  调用方式和前边的模型一样的调用方法


一、VGG网络结构

  这里直接上VGG论文中的一张图:
在这里插入图片描述
  本次实现的包括A、B、D、E分别对应vgg11,vgg13,vgg16,vgg19,我们对比下面这张图进行一块理解:
在这里插入图片描述
  这张图我们对应这D结构分析,可以得知,白色的为卷积+激活函数,红色的为池化层,后面蓝色的为扁平化,可理解为把维度铺平,然后最后输出的时候进入softmax,这个在分类网络的结果上基本上都会用到。因此在代码结构上我们需要一块一块的实现。
  至于原理包括卷积怎么计算,网上已经有很多资料的,我这里就不一一讲述,只要理解这个长宽以及通道数的变化跟着算即可。

二、代码实现

1.引入必须要的库

import numpy as np
import chainer,math
import chainer.functions as F
import chainer.links as L
from nets.core.pooling.max_pooling_2d import MaxPooling2D
from chainer.functions.activation.relu import ReLU
from chainer.functions.noise.dropout import Dropout
from chainer.links.normalization.batch_normalization import BatchNormalization

2.模型构建

版本1(官方)

VGGBLOCK的实现

class VGGBlock(chainer.Chain):
    def __init__(self, n_channels, n_convs=2,in_channels=3,out_size=224, num_classes=None,initialW=chainer.initializers.HeNormal()):
        super(VGGBlock, self).__init__()
        self.out_size = out_size
        with self.init_scope():
            self.conv1 = L.Convolution2D(in_channels, n_channels, 3, 1, 1, initialW=initialW)
            self.conv2 = L.Convolution2D(n_channels, n_channels, 3, 1, 1, initialW=initialW)
            if n_convs == 3:
                self.conv3 = L.Convolution2D(n_channels, n_channels, 3, 1, 1, initialW=initialW)
            else:
                self.conv3 = None
            self.out_size = math.ceil(self.out_size/2)
            if num_classes:
                self.fc4 = L.Linear(n_channels*self.out_size*self.out_size, 4096, initialW=initialW)
                self.fc5 = L.Linear(4096, 4096, initialW=initialW)
                self.fc6 = L.Linear(4096, num_classes, initialW=initialW)
            else:
                self.fc4,self.fc5,self.fc6 = None,None,None

    def __call__(self, x):
        h = F.relu(self.conv1(x))
        h = F.relu(self.conv2(h))
        if self.conv3 is not None:
            h = F.relu(self.conv3(h))
        h = F.max_pooling_2d(h, 2, 2)
        if self.fc4 is not None:
            h = F.dropout(F.relu(self.fc4(h)))
        if self.fc5 is not None:
            h = F.dropout(F.relu(self.fc5(h)))
        if self.fc6 is not None:
            h = self.fc6(h)
        return h

VGG主体结构

class VGG_Simple(chainer.ChainList):
    cfgs = {
        'vgg11': [(64,1), (128,1), (256, 2), (512, 2), (512, 2)],
        'vgg13': [(64,2), (128,2), (256, 2), (512, 2), (512, 2)],
        'vgg16': [(64,2), (128,2), (256, 3), (512, 3), (512, 3)],
        'vgg19': [(64,2), (128,2), (256, 4), (512, 4), (512, 4)],
    }
    def __init__(self,num_classes=1000, model_name='vgg16',channels=3,image_size=224,initialW=chainer.initializers.HeNormal()):
        super(VGG_Simple, self).__init__()
        
        self.block1 = VGGBlock(self.cfgs[model_name][0][0],self.cfgs[model_name][0][1],in_channels=channels,out_size=image_size,num_classes=None,initialW=initialW)
        self.block2 = VGGBlock(self.cfgs[model_name][1][0],self.cfgs[model_name][1][1],in_channels=self.cfgs[model_name][0][0],out_size=self.block1.out_size,num_classes=None,initialW=initialW)
        self.block3 = VGGBlock(self.cfgs[model_name][2][0],self.cfgs[model_name][2][1],in_channels=self.cfgs[model_name][1][0],out_size=self.block2.out_size,num_classes=None,initialW=initialW)
        self.block4 = VGGBlock(self.cfgs[model_name][3][0],self.cfgs[model_name][3][1],in_channels=self.cfgs[model_name][2][0],out_size=self.block3.out_size,num_classes=None,initialW=initialW)
        self.block5 = VGGBlock(self.cfgs[model_name][4][0],self.cfgs[model_name][4][1],in_channels=self.cfgs[model_name][3][0],out_size=self.block4.out_size,num_classes=num_classes,initialW=initialW)
        self.add_link(self.block1)
        self.add_link(self.block2)
        self.add_link(self.block3)
        self.add_link(self.block4)
        self.add_link(self.block5)

    def __call__(self, x):
        for f in self.children():
            x = f(x)
        if chainer.config.train:
            return x
        return F.softmax(x)

版本2(结合pytorch的进行修改)

class VGG_Complex(chainer.Chain):
    cfgs = {
        'vgg11': [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
        'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
        'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
        'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
    }

    def make_features(self,model_name,in_channels,image_size,initialW):
        layers = []
        conv_layers=1
        conv_layers_2=1
        out_size = image_size
        
        for v in self.cfgs[model_name]:
            if v == "M":
                layers += [("_MaxPooling_{0}".format(conv_layers),MaxPooling2D(2, 2))]
                out_size = math.ceil(out_size/2)
                conv_layers+=1
                conv_layers_2=1
            else:
                conv2d = L.Convolution2D(in_channels=in_channels, out_channels=v, ksize=3, pad=1,initialW=initialW, nobias=True)
                layers += [("Conv{0}_{1}".format(conv_layers,conv_layers_2),conv2d), 
                           ("BN{0}_{1}".format(conv_layers,conv_layers_2),BatchNormalization(v)), 
                           ("_Relu{0}_{1}".format(conv_layers,conv_layers_2),ReLU())
                          ]
                in_channels = v
                conv_layers_2+=1
        return layers,in_channels,out_size
    
    def __init__(self, num_classes=1000, model_name='vgg19',channels=3,image_size=224,initialW=chainer.initializers.Normal(0.01)):
        super(VGG_Complex, self).__init__()
        self.features,output_channels,output_size = self.make_features(model_name,channels,image_size,initialW)
        
        self.classifier = [
                ("fc1",L.Linear(output_channels*output_size*output_size, 4096)),
                ("_fc1_relu",ReLU()),
                ("_dropout1",Dropout(0.5)),
                ("fc2",L.Linear(4096, 4096)),
                ("_fc2_relu",ReLU()),
                ("_dropout2",Dropout(0.5)),
                ("output_1",L.Linear(4096, num_classes))
        ]
        
        with self.init_scope():
            for n in self.features:
                if not n[0].startswith('_'):
                    setattr(self, n[0], n[1])
            for n in self.classifier:
                if not n[0].startswith('_'):
                    setattr(self, n[0], n[1])

    def __call__(self, x):
        for n, f in self.features:
            if not n.startswith('_'):
                x = getattr(self, n)(x)
            else:
                x = f.apply((x,))[0]
        for n, f in self.classifier:
            if not n.startswith('_'):
                x = getattr(self, n)(x)
            else:
                x = f.apply((x,))[0]

        if chainer.config.train:
            return x
        return F.softmax(x)

三、模型调用

import numpy as np
from nets.VGG import VGG_Simple,VGG_Complex
import chainer
import chainer.functions as F
import chainer.links as L

if __name__ == '__main__':
    batch_size = 4
    n_channels = 3
    image_size = 224
    num_classes = 10
    
    model_simple = VGG_Simple(num_classes=num_classes, channels=n_channels,image_size=image_size)
    model_complex = VGG_Complex(num_classes=num_classes, channels=n_channels,image_size=image_size)
    print(model_simple.count_params())
    print(model_complex.count_params())
    
    x = np.random.rand(batch_size, n_channels, image_size, image_size).astype(np.float32)
    t = np.random.randint(0, num_classes, size=(batch_size,)).astype(np.int32)
    with chainer.using_config('train', True):
        y1 = model_simple(x)
        y2 = model_complex(x)
    loss1 = F.softmax_cross_entropy(y1, t)
    loss2 = F.softmax_cross_entropy(y2, t)

参数量:
在这里插入图片描述

你可能感兴趣的:(深度学习-chainer,重构,python,chainer,主干网络,VGG)