最近要在paddlepaddle上实现mnasnet,要对各大框架实现有所了解。resnet50是比较经典的用于图像分类的网络,其在各大框架均有实现。
我们先来看caffe的prototxt实现,大改要写2300多行代码,不仅写的费劲,而且很容易出错,这里就不贴代码了.
通过Netscopes可视化
当然也有有用python写的,很是简洁
import caffe
from caffe import layers as L
from caffe import params as P
def conv_bn_scale_relu(bottom, num_output=64, kernel_size=3, stride=1, pad=0):
conv = L.Convolution(bottom, num_output=num_output, kernel_size=kernel_size, stride=stride, pad=pad,
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
weight_filler=dict(type='xavier', std=0.01),
bias_filler=dict(type='constant', value=0))
conv_bn = L.BatchNorm(conv, use_global_stats=False, in_place=True)
conv_scale = L.Scale(conv, scale_param=dict(bias_term=True), in_place=True)
conv_relu = L.ReLU(conv, in_place=True)
return conv, conv_bn, conv_scale, conv_relu
def conv_bn_scale(bottom, num_output=64, kernel_size=3, stride=1, pad=0):
conv = L.Convolution(bottom, num_output=num_output, kernel_size=kernel_size, stride=stride, pad=pad,
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
weight_filler=dict(type='xavier', std=0.01),
bias_filler=dict(type='constant', value=0.2))
conv_bn = L.BatchNorm(conv, use_global_stats=False, in_place=True)
conv_scale = L.Scale(conv, scale_param=dict(bias_term=True), in_place=True)
return conv, conv_bn, conv_scale
def eltwize_relu(bottom1, bottom2):
residual_eltwise = L.Eltwise(bottom1, bottom2, eltwise_param=dict(operation=1))
residual_eltwise_relu = L.ReLU(residual_eltwise, in_place=True)
return residual_eltwise, residual_eltwise_relu
def residual_branch(bottom, base_output=64):
"""
input:4*base_output x n x n
output:4*base_output x n x n
:param base_output: base num_output of branch2
:param bottom: bottom layer
:return: layers
"""
branch2a, branch2a_bn, branch2a_scale, branch2a_relu = \
conv_bn_scale_relu(bottom, num_output=base_output, kernel_size=1) # base_output x n x n
branch2b, branch2b_bn, branch2b_scale, branch2b_relu = \
conv_bn_scale_relu(branch2a, num_output=base_output, kernel_size=3, pad=1) # base_output x n x n
branch2c, branch2c_bn, branch2c_scale = \
conv_bn_scale(branch2b, num_output=4 * base_output, kernel_size=1) # 4*base_output x n x n
residual, residual_relu = \
eltwize_relu(bottom, branch2c) # 4*base_output x n x n
return branch2a, branch2a_bn, branch2a_scale, branch2a_relu, branch2b, branch2b_bn, branch2b_scale, branch2b_relu, \
branch2c, branch2c_bn, branch2c_scale, residual, residual_relu
def residual_branch_shortcut(bottom, stride=2, base_output=64):
"""
:param stride: stride
:param base_output: base num_output of branch2
:param bottom: bottom layer
:return: layers
"""
branch1, branch1_bn, branch1_scale = \
conv_bn_scale(bottom, num_output=4 * base_output, kernel_size=1, stride=stride)
branch2a, branch2a_bn, branch2a_scale, branch2a_relu = \
conv_bn_scale_relu(bottom, num_output=base_output, kernel_size=1, stride=stride)
branch2b, branch2b_bn, branch2b_scale, branch2b_relu = \
conv_bn_scale_relu(branch2a, num_output=base_output, kernel_size=3, pad=1)
branch2c, branch2c_bn, branch2c_scale = \
conv_bn_scale(branch2b, num_output=4 * base_output, kernel_size=1)
residual, residual_relu = \
eltwize_relu(branch1, branch2c) # 4*base_output x n x n
return branch1, branch1_bn, branch1_scale, branch2a, branch2a_bn, branch2a_scale, branch2a_relu, branch2b, \
branch2b_bn, branch2b_scale, branch2b_relu, branch2c, branch2c_bn, branch2c_scale, residual, residual_relu
branch_shortcut_string = 'n.res(stage)a_branch1, n.res(stage)a_branch1_bn, n.res(stage)a_branch1_scale, \
n.res(stage)a_branch2a, n.res(stage)a_branch2a_bn, n.res(stage)a_branch2a_scale, n.res(stage)a_branch2a_relu, \
n.res(stage)a_branch2b, n.res(stage)a_branch2b_bn, n.res(stage)a_branch2b_scale, n.res(stage)a_branch2b_relu, \
n.res(stage)a_branch2c, n.res(stage)a_branch2c_bn, n.res(stage)a_branch2c_scale, n.res(stage)a, n.res(stage)a_relu = \
residual_branch_shortcut((bottom), stride=(stride), base_output=(num))'
branch_string = 'n.res(stage)b(order)_branch2a, n.res(stage)b(order)_branch2a_bn, n.res(stage)b(order)_branch2a_scale, \
n.res(stage)b(order)_branch2a_relu, n.res(stage)b(order)_branch2b, n.res(stage)b(order)_branch2b_bn, \
n.res(stage)b(order)_branch2b_scale, n.res(stage)b(order)_branch2b_relu, n.res(stage)b(order)_branch2c, \
n.res(stage)b(order)_branch2c_bn, n.res(stage)b(order)_branch2c_scale, n.res(stage)b(order), n.res(stage)b(order)_relu = \
residual_branch((bottom), base_output=(num))'
class ResNet(object):
def __init__(self, lmdb_train, lmdb_test, num_output):
self.train_data = lmdb_train
self.test_data = lmdb_test
self.classifier_num = num_output
def resnet_layers_proto(self, batch_size, phase='TRAIN', stages=(3, 4, 6, 3)):
"""
:param batch_size: the batch_size of train and test phase
:param phase: TRAIN or TEST
:param stages: the num of layers = 2 + 3*sum(stages), layers would better be chosen from [50, 101, 152]
{every stage is composed of 1 residual_branch_shortcut module and stage[i]-1 residual_branch
modules, each module consists of 3 conv layers}
(3, 4, 6, 3) for 50 layers; (3, 4, 23, 3) for 101 layers; (3, 8, 36, 3) for 152 layers
"""
n = caffe.NetSpec()
if phase == 'TRAIN':
source_data = self.train_data
mirror = True
else:
source_data = self.test_data
mirror = False
n.data, n.label = L.Data(source=source_data, backend=P.Data.LMDB, batch_size=batch_size, ntop=2,
transform_param=dict(crop_size=224, mean_value=[104, 117, 123], mirror=mirror))
n.conv1, n.conv1_bn, n.conv1_scale, n.conv1_relu = \
conv_bn_scale_relu(n.data, num_output=64, kernel_size=7, stride=2, pad=3) # 64x112x112
n.pool1 = L.Pooling(n.conv1, kernel_size=3, stride=2, pool=P.Pooling.MAX) # 64x56x56
for num in xrange(len(stages)): # num = 0, 1, 2, 3
for i in xrange(stages[num]):
if i == 0:
stage_string = branch_shortcut_string
bottom_string = ['n.pool1', 'n.res2b%s' % str(stages[0] - 1), 'n.res3b%s' % str(stages[1] - 1),
'n.res4b%s' % str(stages[2] - 1)][num]
else:
stage_string = branch_string
if i == 1:
bottom_string = 'n.res%sa' % str(num + 2)
else:
bottom_string = 'n.res%sb%s' % (str(num + 2), str(i - 1))
exec (stage_string.replace('(stage)', str(num + 2)).replace('(bottom)', bottom_string).
replace('(num)', str(2 ** num * 64)).replace('(order)', str(i)).
replace('(stride)', str(int(num > 0) + 1)))
exec 'n.pool5 = L.Pooling((bottom), pool=P.Pooling.AVE, global_pooling=True)'.\
replace('(bottom)', 'n.res5b%s' % str(stages[3] - 1))
n.classifier = L.InnerProduct(n.pool5, num_output=self.classifier_num,
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
weight_filler=dict(type='xavier'),
bias_filler=dict(type='constant', value=0))
n.loss = L.SoftmaxWithLoss(n.classifier, n.label)
if phase == 'TRAIN':
pass
else:
n.accuracy_top1 = L.Accuracy(n.classifier, n.label, include=dict(phase=1))
n.accuracy_top5 = L.Accuracy(n.classifier, n.label, include=dict(phase=1),
accuracy_param=dict(top_k=5))
return n.to_proto()
再来看pytorch的,已然少了很多,把很多公共的操作封装了起来,不足之处在于对新手不友好,不太能一下就看出意思
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = conv1x1(inplanes, planes)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = conv3x3(planes, planes, stride)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = conv1x1(planes, planes * self.expansion)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
super(ResNet, self).__init__()
self.inplanes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
return model
def resnet34(pretrained=False, **kwargs):
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
return model
def resnet50(pretrained=False, **kwargs):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
return model
def resnet101(pretrained=False, **kwargs):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
return model
def resnet152(pretrained=False, **kwargs):
"""Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
return model
最后是paddlepaddle的
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import math
__all__ = ["ResNet", "ResNet50", "ResNet101", "ResNet152"]
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class ResNet():
def __init__(self, layers=50):
self.params = train_parameters
self.layers = layers
def net(self, input, class_dim=1000):
layers = self.layers
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_filters = [64, 128, 256, 512]
conv = self.conv_bn_layer(
input=input, num_filters=64, filter_size=7, stride=2, act='relu')
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
for block in range(len(depth)):
for i in range(depth[block]):
conv = self.bottleneck_block(
input=conv,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1)
pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(input=pool,
size=class_dim,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv,
stdv)))
return out
def conv_bn_layer(self,
input,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False)
return fluid.layers.batch_norm(input=conv, act=act)
def shortcut(self, input, ch_out, stride):
ch_in = input.shape[1]
if ch_in != ch_out or stride != 1:
return self.conv_bn_layer(input, ch_out, 1, stride)
else:
return input
def bottleneck_block(self, input, num_filters, stride):
conv0 = self.conv_bn_layer(
input=input, num_filters=num_filters, filter_size=1, act='relu')
conv1 = self.conv_bn_layer(
input=conv0,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
conv2 = self.conv_bn_layer(
input=conv1, num_filters=num_filters * 4, filter_size=1, act=None)
short = self.shortcut(input, num_filters * 4, stride)
return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
def ResNet50():
model = ResNet(layers=50)
return model
def ResNet101():
model = ResNet(layers=101)
return model
def ResNet152():
model = ResNet(layers=152)
return model
当然,上面存在的主要问题是没对结构做清晰的说明以及实验案例,好在PaddlePaddle-ResNet-ImageClassification给我们准备了全套方案。
最后附上简化版结构图