手势识别与数字识别很像,共种10种手势,代表0-10个数字。首先我们假设已经根据图像位置生成图像位置与label的列表,分别为train_data.list和test_data.list。其中一行的示例如下:
photos/P0000001.jpg 0
手势图片中信息含量很多,但是降低图像分辨率到100x100后仍然可以清晰的分辨出手势含义,为了降低计算量和存储我们将图片resize为(3,100,100)。
# 定义训练集和测试集的reader
def data_mapper(sample):
img, label = sample
img = Image.open(img)
img = img.resize((100, 100), Image.ANTIALIAS)
img = np.array(img).astype('float32')
img = img.transpose((2, 0, 1))
img = img/255.0
img = img*2-1
return img, label
def data_reader(data_list_path):
def reader():
with open(data_list_path, 'r') as f:
lines = f.readlines()
for line in lines:
img, label = line.split('\t')
yield img, int(label)
return paddle.reader.xmap_readers(data_mapper, reader, cpu_count(), 512)
# 用于训练的数据提供器
train_reader = paddle.batch(reader=paddle.reader.shuffle(reader=data_reader('./train_data.list'), buf_size=256), batch_size=32)
# 用于测试的数据提供器
test_reader = paddle.batch(reader=data_reader('./test_data.list'), batch_size=32)
# 定义 AlexNet 网络结构
class AlexNet(fluid.dygraph.Layer):
def __init__(self, num_classes=10,is_train=True):
super(AlexNet, self).__init__()
name_scope = self.full_name()
# AlexNet与LeNet一样也会同时使用卷积和池化层提取图像特征
# 与LeNet不同的是激活函数换成了‘relu’
self.conv1 = Conv2D(num_channels = 3, num_filters=32, filter_size=3, stride=1, padding=5, act='relu')
self.pool1 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
self.conv2 = Conv2D(num_channels = 32, num_filters=64, filter_size=3, stride=1, padding=2, act='relu')
self.pool2 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
self.conv3 = Conv2D(num_channels = 64, num_filters=48, filter_size=2, stride=1, padding=1, act='relu')
self.conv4 = Conv2D(num_channels = 48, num_filters=48, filter_size=2, stride=1, padding=1, act='relu')
self.conv5 = Conv2D(num_channels = 48, num_filters=32, filter_size=2, stride=1, padding=1, act='relu')
self.pool5 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
self.fc1 = Linear(input_dim = 32*15*15,output_dim=4096, act='relu')
if is_train:
self.drop_ratio1 = 0.5
self.drop_ratio2 = 0.5
else:
self.drop_ratio1 = 0.0
self.drop_ratio2 = 0.0
self.fc2 = Linear(input_dim = 4096,output_dim=4096, act='relu')
self.fc3 = Linear(input_dim = 4096,output_dim=10)
def forward(self, x):
x = self.conv1(x) # 5, 32, 108, 108
x = self.pool1(x) # [5, 32, 54, 54]
x = self.conv2(x) # 5, 64, 56, 56
x = self.pool2(x) # 5, 64, 28, 28
x = self.conv3(x) # 5, 48, 29, 29
x = self.conv4(x) # 5, 48, 30, 30
x = self.conv5(x) # 5, 32, 31, 31
x = self.pool5(x) # 5, 32, 15, 15
x = fluid.layers.reshape(x, [x.shape[0], -1])
x = self.fc1(x)
# 在全连接之后使用dropout抑制过拟合
x= fluid.layers.dropout(x, self.drop_ratio1)
x = self.fc2(x)
# 在全连接之后使用dropout抑制过拟合
x = fluid.layers.dropout(x, self.drop_ratio2)
x = self.fc3(x)
return x
#用动态图进行训练
with fluid.dygraph.guard():
model=AlexNet(10, True) #模型实例化
model.train() #训练模式
boundaries = [20000, 30000] # 学习率衰减步数
values = [0.01, 0.001, 0.0001] # 学习率衰减值
opt=fluid.optimizer.SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(boundaries=boundaries, values=values), parameter_list=model.parameters())#优化器选用SGD随机梯度下降,学习率为0.001.
epochs_num=300 #迭代次数
for pass_num in range(epochs_num):
for batch_id,data in enumerate(train_reader()):
images=np.array([x[0].reshape(3,100,100) for x in data],np.float32)
labels = np.array([x[1] for x in data]).astype('int64')
labels = labels[:, np.newaxis]
# print(images.shape)
image=fluid.dygraph.to_variable(images)
label=fluid.dygraph.to_variable(labels)
predict=model(image)#预测
# print(predict.numpy()[0],label.numpy()[0])
loss=fluid.layers.softmax_with_cross_entropy(predict,label)
# print(loss.numpy())
avg_loss=fluid.layers.mean(loss)#获取loss值
acc=fluid.layers.accuracy(predict,label)#计算精度
if batch_id!=0 and batch_id%25==0:
print("train_pass:{},batch_id:{},train_loss:{},train_acc:{}".format(pass_num,batch_id,avg_loss.numpy(),acc.numpy()))
# if acc.numpy()==0:
# print(predict.numpy()[:5],label.numpy()[:5])
# break
avg_loss.backward()
opt.minimize(avg_loss)
model.clear_gradients()
if pass_num%50==0:
fluid.save_dygraph(model.state_dict(),'AlexNet'+str(pass_num))
fluid.save_dygraph(model.state_dict(),'AlexNet')#保存模型
#模型校验
with fluid.dygraph.guard():
accs = []
model_dict, _ = fluid.load_dygraph('AlexNet200')
model = AlexNet(10, False)
model.load_dict(stat_dict=model_dict) #加载模型参数
model.eval() #训练模式
for batch_id,data in enumerate(test_reader()):#测试集
images=np.array([x[0].reshape(3,100,100) for x in data],np.float32)
labels = np.array([x[1] for x in data]).astype('int64')
labels = labels[:, np.newaxis]
image=fluid.dygraph.to_variable(images)
label=fluid.dygraph.to_variable(labels)
predict=model(image)
acc=fluid.layers.accuracy(predict,label)
accs.append(acc.numpy()[0])
avg_acc = np.mean(accs)
print(avg_acc)
经过上述训练,测试集的准确率可以达到0.9776786。
这一块的内容已经在博客位置写过,链接为https://blog.csdn.net/weixin_43938942/article/details/105271436
这里不再重复。
在这里我将LeNet,AlexNet,VGG,GoogLeNet,ResNet用Paddle1.7.1重新写了一下。为方便大家直接在AIstudio中运行这里链接给出https://aistudio.baidu.com/aistudio/projectdetail/372503主要是依据眼疾数据进行了测试。
下面代码为网络结构的代码的复制。
LeNet在眼疾中没有测试,主要是眼疾图片信息含量高,LeNet准确率不高,且设计的LeNet输出尺寸为1x32x32
# 定义 LeNet 网络结构 input dim = 1x32x32
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
class LeNet(fluid.dygraph.Layer):
def __init__(self, num_classes=2):
super(LeNet, self).__init__()
name_scope = self.full_name()
self.name = 'LeNet'
# 创建卷积和池化层块,每个卷积层使用Sigmoid激活函数,后面跟着一个2x2的池化
self.conv1 = Conv2D(num_channels = 1, num_filters=6, filter_size=2, act='sigmoid')
self.pool1 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
self.conv2 = Conv2D(num_channels = 6, num_filters=16, filter_size=2, act='sigmoid')
self.pool2 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
# 创建第3个卷积层
self.conv3 = Conv2D(num_channels = 16*7*7, num_filters=120, filter_size=4, act='sigmoid')
# 创建全连接层,第一个全连接层的输出神经元个数为64, 第二个全连接层输出神经元个数为分裂标签的类别数
self.fc1 = Linear(input_dim = 120, output_dim=64, act='sigmoid')
self.drop_ratio1 = 0.5
self.fc2 = Linear(input_dim = 64, output_dim=num_classes)
# 网络的前向计算过程
def forward(self, x):
x = self.conv1(x)# [5, 6, 31, 31]
x = self.pool1(x)# [5, 6, 15, 15]
x = self.conv2(x)# [5, 16, 14, 14]
x = self.pool2(x)# [5, 16, 7, 7]
x = self.conv3(x)# [5, 120, 1, 1]
x = fluid.layers.reshape(x,[x.shape[0], -1])
x = self.fc1(x)
fluid.layers.dropout(x, self.drop_ratio1)
x = self.fc2(x)
return x
AlexNet网络结构 本次网络结构中的一些超参数设计的不太合理,大家可以根据自己需要自行修改超参数
# 定义 AlexNet 网络结构
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
class AlexNet(fluid.dygraph.Layer):
def __init__(self, num_classes=2):
super(AlexNet, self).__init__()
name_scope = self.full_name()
self.name = 'AlexNet'
# AlexNet与LeNet一样也会同时使用卷积和池化层提取图像特征
# 与LeNet不同的是激活函数换成了‘relu’
self.conv1 = Conv2D(num_channels = 3, num_filters=96, filter_size=11, stride=4, padding=5, act='relu')
self.pool1 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
self.conv2 = Conv2D(num_channels = 96, num_filters=256, filter_size=5, stride=1, padding=2, act='relu')
self.pool2 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
self.conv3 = Conv2D(num_channels = 256, num_filters=384, filter_size=3, stride=1, padding=1, act='relu')
self.conv4 = Conv2D(num_channels = 384, num_filters=384, filter_size=3, stride=1, padding=1, act='relu')
self.conv5 = Conv2D(num_channels = 384, num_filters=256, filter_size=3, stride=1, padding=1, act='relu')
self.pool5 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
self.fc1 = Linear(input_dim = 256*7*7,output_dim=4096, act='relu')
self.drop_ratio1 = 0.5
self.fc2 = Linear(input_dim = 4096,output_dim=4096, act='relu')
self.drop_ratio2 = 0.5
self.fc3 = Linear(input_dim = 4096,output_dim=num_classes)
def forward(self, x):
x = self.conv1(x) # [5, 96, 56, 56]
print('#',x.shape)
x = self.pool1(x) # [5, 96, 28, 28]
print('#',x.shape)
x = self.conv2(x) # [5, 256, 28, 28]
print('#',x.shape)
x = self.pool2(x) # [5, 256, 14, 14]
print('#',x.shape)
x = self.conv3(x) # [5, 384, 14, 14]
print('#',x.shape)
x = self.conv4(x) # [5, 384, 14, 14]
print('#',x.shape)
x = self.conv5(x) # [5, 256, 14, 14]
print('#',x.shape)
x = self.pool5(x) # [5, 256, 7, 7]
print('#',x.shape)
x = fluid.layers.reshape(x, [x.shape[0], -1])
x = self.fc1(x)
print('#',x.shape)
# 在全连接之后使用dropout抑制过拟合
x= fluid.layers.dropout(x, self.drop_ratio1)
x = self.fc2(x)
print('#',x.shape)
# 在全连接之后使用dropout抑制过拟合
x = fluid.layers.dropout(x, self.drop_ratio2)
x = self.fc3(x)
return x
VGG网络结构
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
class VGG(fluid.dygraph.Layer):
def __init__(self, num_classes = 2, conv_arch=((2, 64),
(2, 128), (3, 256), (3, 512), (3, 512))):
super(VGG, self).__init__()
self.name = 'VGG'
self.vgg_blocks=[]
iter_id = 0
# 添加vgg_block
# 这里一共5个vgg_block,每个block里面的卷积层数目和输出通道数由conv_arch指定
channels = 3
for (num_convs, num_channels) in conv_arch:
block = self.add_sublayer('block_' + str(iter_id),
ConvPool(num_channels = channels,
num_filters = num_channels,
filter_size = 3,
pool_size = 2,
pool_stride = 2,
groups = num_convs,
pool_padding=0,
pool_type='max',
conv_stride=1,
conv_padding=1,
act=None))
channels = num_channels
self.vgg_blocks.append(block)
iter_id += 1
self.fc1 = Linear(input_dim = 512*7*7,output_dim=4096,act='relu')
self.drop1_ratio = 0.5
self.fc2= Linear(input_dim = 4096,output_dim=4096,act='relu')
self.drop2_ratio = 0.5
self.fc3 = Linear(input_dim = 4096,output_dim=num_classes)
def forward(self, x):
for item in self.vgg_blocks:
x = item(x)
# print(x.shape)
x = fluid.layers.reshape(x, [x.shape[0], -1])
x = fluid.layers.dropout(self.fc1(x), self.drop1_ratio)
x = fluid.layers.dropout(self.fc2(x), self.drop2_ratio)
x = self.fc3(x)
return x
GoogLeNet网络结构
# GoogLeNet 网络搭建
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
# GoogLeNet
# 定义Inception块
class Inception(fluid.dygraph.Layer):
def __init__(self,num_channels, c1, c2, c3, c4):
'''
Inception模块的实现代码,
c1, 图(b)中第一条支路1x1卷积的输出通道数,数据类型是整数
c2,图(b)中第二条支路卷积的输出通道数,数据类型是tuple或list,
其中c2[0]是1x1卷积的输出通道数,c2[1]是3x3
c3,图(b)中第三条支路卷积的输出通道数,数据类型是tuple或list,
其中c3[0]是1x1卷积的输出通道数,c3[1]是3x3
c4, 图(b)中第一条支路1x1卷积的输出通道数,数据类型是整数
'''
super(Inception, self).__init__()
# 依次创建Inception块每条支路上使用到的操作
self.p1_1 = Conv2D(num_channels = num_channels, num_filters=c1,
filter_size=1, act='relu')
self.p2_1 = Conv2D(num_channels = num_channels, num_filters=c2[0],
filter_size=1, act='relu')
self.p2_2 = Conv2D(num_channels = c2[0], num_filters=c2[1],
filter_size=3, padding=1, act='relu')
self.p3_1 = Conv2D(num_channels = num_channels, num_filters=c3[0],
filter_size=1, act='relu')
self.p3_2 = Conv2D(num_channels = c3[0], num_filters=c3[1],
filter_size=5, padding=2, act='relu')
self.p4_1 = Pool2D(pool_size=3, pool_stride=1, pool_padding=1, pool_type='max')
self.p4_2 = Conv2D(num_channels = num_channels, num_filters=c4, filter_size=1, act='relu')
def forward(self, x):
# 支路1只包含一个1x1卷积 c1 x w x h
p1 = self.p1_1(x)
# 支路2包含 1x1卷积 + 3x3卷积 c2[1] x w x h
p2 = self.p2_2(self.p2_1(x))
# 支路3包含 1x1卷积 + 5x5卷积 c3[1] x w x h
p3 = self.p3_2(self.p3_1(x))
# 支路4包含 最大池化和1x1卷积 c4 x w x h
p4 = self.p4_2(self.p4_1(x))
# 将每个支路的输出特征图拼接在一起作为最终的输出结果 (c1+c2[1]+c3[1]+c4) x w x h
return fluid.layers.concat([p1, p2, p3, p4], axis=1)
class GoogLeNet(fluid.dygraph.Layer):
def __init__(self, num_classes = 2):
super(GoogLeNet, self).__init__()
# GoogLeNet包含五个模块,每个模块后面紧跟一个池化层
# 第一个模块包含1个卷积层
self.name = 'GoogLeNet'
self.conv1 = Conv2D(num_channels = 3,num_filters=64, filter_size=7,
padding=3, act='relu')
# 3x3最大池化
self.pool1 = Pool2D(pool_size=3, pool_stride=2,
pool_padding=1, pool_type='max')
# 第二个模块包含2个卷积层
self.conv2_1 = Conv2D(num_channels = 64, num_filters=64,
filter_size=1, act='relu')
self.conv2_2 = Conv2D(num_channels = 64, num_filters=192,
filter_size=3, padding=1, act='relu')
# 3x3最大池化
self.pool2 = Pool2D(pool_size=3, pool_stride=2,
pool_padding=1, pool_type='max')
# 第三个模块包含2个Inception块
self.block3_1 = Inception(num_channels = 192, c1 = 64, c2 = (96, 128), c3 = (16, 32), c4 = 32)
self.block3_2 = Inception(num_channels = 256, c1 = 128, c2 = (128, 192), c3 = (32, 96), c4 = 64)
# 3x3最大池化
self.pool3 = Pool2D(pool_size=3, pool_stride=2,
pool_padding=1, pool_type='max')
# 第四个模块包含5个Inception块
self.block4_1 = Inception(num_channels = 480, c1 = 192, c2 = (96, 208), c3 = (16, 48), c4 = 64)
self.block4_2 = Inception(num_channels = 512, c1 = 160, c2 = (112, 224), c3 = (24, 64), c4 = 64)
self.block4_3 = Inception(num_channels = 512, c1 = 128, c2 = (128, 256), c3 = (24, 64), c4 = 64)
self.block4_4 = Inception(num_channels = 512, c1 = 112, c2 = (144, 288), c3 = (32, 64), c4 = 64)
self.block4_5 = Inception(num_channels = 528, c1 = 256, c2 = (160, 320), c3 = (32, 128), c4 = 128)
# 3x3最大池化
self.pool4 = Pool2D(pool_size=3, pool_stride=2,
pool_padding=1, pool_type='max')
# 第五个模块包含2个Inception块
self.block5_1 = Inception(num_channels = 832, c1 = 256, c2 = (160, 320), c3 = (32, 128), c4 = 128)
self.block5_2 = Inception(num_channels = 832, c1 = 384, c2 = (192, 384), c3 = (48, 128), c4 = 128)
# 全局池化,尺寸用的是global_pooling,pool_stride不起作用
self.pool5 = Pool2D(pool_stride=1,
global_pooling=True, pool_type='avg')
self.fc = Linear(input_dim = 1024,output_dim=num_classes)
def forward(self, x):
x = self.pool1(self.conv1(x))
x = self.pool2(self.conv2_2(self.conv2_1(x)))
x = self.pool3(self.block3_2(self.block3_1(x)))
x = self.block4_3(self.block4_2(self.block4_1(x)))
x = self.pool4(self.block4_5(self.block4_4(x)))
x = self.pool5(self.block5_2(self.block5_1(x)))
x = fluid.layers.reshape(x, [x.shape[0], -1])
x = self.fc(x)
# print(x.shape)
return x
ResNet 网络结构
# ResNet
# ResNet中使用了BatchNorm层,在卷积层的后面加上BatchNorm以提升数值稳定性
# 定义卷积批归一化块
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
"""
num_channels, 卷积层的输入通道数
num_filters, 卷积层的输出通道数
stride, 卷积层的步幅
groups, 分组卷积的组数,默认groups=1不使用分组卷积
act, 激活函数类型,默认act=None不使用激活函数
"""
super(ConvBNLayer, self).__init__()
# 创建卷积层
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False)
# 创建BatchNorm层
self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
# 定义残差块
# 每个残差块会对输入图片做三次卷积,然后跟输入图片进行短接
# 如果残差块中第三次卷积输出特征图的形状与输入不一致,则对输入图片做1x1卷积,将其输出形状调整成一致
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True):
super(BottleneckBlock, self).__init__()
# 创建第一个卷积层 1x1
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu')
# 创建第二个卷积层 3x3
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
# 创建第三个卷积 1x1,但输出通道数乘以4
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None)
# 如果conv2的输出跟此残差块的输入数据形状一致,则shortcut=True
# 否则shortcut = False,添加1个1x1的卷积作用在输入数据上,使其形状变成跟conv2一致
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=stride)
self.shortcut = shortcut
self._num_channels_out = num_filters * 4
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
# 如果shortcut=True,直接将inputs跟conv2的输出相加
# 否则需要对inputs进行一次卷积,将形状调整成跟conv2输出一致
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = fluid.layers.elementwise_add(x=short, y=conv2)
layer_helper = LayerHelper(self.full_name(), act='relu')
return layer_helper.append_activation(y)
# 定义ResNet模型
class ResNet(fluid.dygraph.Layer):
def __init__(self, layers=50, class_dim=1):
"""
name_scope,模块名称
layers, 网络层数,可以是50, 101或者152
class_dim,分类标签的类别数
"""
super(ResNet, self).__init__()
self.name = 'ResNet'
self.layers = layers
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 50:
#ResNet50包含多个模块,其中第2到第5个模块分别包含3、4、6、3个残差块
depth = [3, 4, 6, 3]
elif layers == 101:
#ResNet101包含多个模块,其中第2到第5个模块分别包含3、4、23、3个残差块
depth = [3, 4, 23, 3]
elif layers == 152:
#ResNet50包含多个模块,其中第2到第5个模块分别包含3、8、36、3个残差块
depth = [3, 8, 36, 3]
# 残差块中使用到的卷积的输出通道数
num_filters = [64, 128, 256, 512]
# ResNet的第一个模块,包含1个7x7卷积,后面跟着1个最大池化层
self.conv = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool2d_max = Pool2D(
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
# ResNet的第二到第五个模块c2、c3、c4、c5
self.bottleneck_block_list = []
num_channels = 64
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels=num_channels,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1, # c3、c4、c5将会在第一个残差块使用stride=2;其余所有残差块stride=1
shortcut=shortcut))
num_channels = bottleneck_block._num_channels_out
self.bottleneck_block_list.append(bottleneck_block)
shortcut = True
# 在c5的输出特征图上使用全局池化
self.pool2d_avg = Pool2D(pool_size=7, pool_type='avg', global_pooling=True)
# stdv用来作为全连接层随机初始化参数的方差
import math
stdv = 1.0 / math.sqrt(2048 * 1.0)
# 创建全连接层,输出大小为类别数目
self.out = Linear(input_dim = 2048,output_dim=class_dim,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
def forward(self, inputs):
y = self.conv(inputs)
y = self.pool2d_max(y)
for bottleneck_block in self.bottleneck_block_list:
y = bottleneck_block(y)
# print(y.shape)
y = self.pool2d_avg(y)
y = fluid.layers.reshape(y,[y.shape[0],-1])
# print(y.shape)
y = self.out(y)
# print(y.shape)
return y
最后国内深度学习框架,是要与其他pytorch,tensorflow等平台竞争的,不过以其中文api文档的优越性,相信未来一定会在国内火起来的。