视频分类任务、问题与挑战、经典数据集、深度学习相关背景知识
主要用于行为识别
主要用于行为识别,运动分析,是最具有影响力的视频分类数据集之一
主要用于行为识别,运动分析,斯坦福大学提供的视频分类数据集,其运动信息丰富
主要用于行为识别,但由于视频特征并不完善,限制了算法设计发挥的空间
主要用于行为识别,由deepmind团队提出,是最具有影响力的视频分类数据集之一
双流网络、静态图像特征聚合、3D卷积等经典视频分类方法
高效视频网络、运动增强的RGB分类、快慢信息结合网络、光流表示学习、时序金字塔网络
TSN.py代码补全
import paddle.fluid as fluid
import numpy as np
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
name_scope,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
super(ConvBNLayer, self).__init__(name_scope)
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False)
self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self,
name_scope,
num_channels,
num_filters,
stride,
shortcut=True):
super(BottleneckBlock, self).__init__(name_scope)
self.conv0 = ConvBNLayer(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu')
self.conv1 = ConvBNLayer(
self.full_name(),
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
self.conv2 = ConvBNLayer(
self.full_name(),
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None)
if not shortcut:
self.short = ConvBNLayer(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=stride)
self.shortcut = shortcut
self._num_channels_out = num_filters * 4
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = fluid.layers.elementwise_add(x=short, y=conv2)
layer_helper = LayerHelper(self.full_name(), act='relu')
return layer_helper.append_activation(y)
class TSNResNet(fluid.dygraph.Layer):
# 定义网络结构,代码补齐
def __init__(self,
name_scope,
layers=50,
class_dim=102,
seg_num=10,
weight_devay=None):
super(TSNResNet, self).__init__(name_scope)
self.layers = layers
self.seg_num = seg_num
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer(
self.full_name(),
num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool2d_max = Pool2D(
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
self.bottleneck_block_list = []
num_channels = 64
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut))
num_channels = bottleneck_block._num_channels_out
self.bottleneck_block_list.append(bottleneck_block)
shortcut = True
self.pool2d_avg = Pool2D(pool_size=7, pool_type='avg', global_pooling=True)
import math
stdv = 1.0 / math.sqrt(2048 * 1.0)
self.out = Linear(input_dim=num_channels,
output_dim=class_dim,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv)))
def forward(self, inputs, laber=None):
out = fluid.layers.reshape(inputs, [-1, inputs.shape[2], inputs.shape[3], inputs.shape[4]])
y = self.conv(out)
y = self.pool2d_max(y)
for bottleneck_block in self.bottleneck_block_list:
y = bottleneck_block(y)
y = self.pool2d_avg(y)
out = fluid.layers.reshape(x=y, shape=[-1, self.seg_num, y.shape[1]])
out = fluid.layers.reduce_mean(out, dim=1)
y = self.out(out)
if laber is not None:
acc = fluid.layers.accuracy(input=y, label=laber)
return y, acc
else:
return y
if __name__ == '__main__':
with fluid.dygraph.guard():
network = TSNResNet('resnet', 50)
img = np.zeros([1, 10, 3, 224, 224]).astype('float32')
img = fluid.dygraph.to_variable(img)
outs = network(img).numpy()
print(outs)