论文链接:https://arxiv.org/pdf/1907.05047v1.pdf
PyTorch:https://github.com/shanglianlm0525/BlazeFace
BlazeFace算法是作者在MobileNet-SSD目标检测框架下,改进了网络结构、anchor机制、替换NMS后处理,使算法在人脸检测任务中保持高精度的同时,在移动GPU推理量身定制的轻量级网络。
在深度可分离卷积(depthwise separable convolution)中,depthwise convolution部分( s 2 c k 2 s^2ck^2 s2ck2)与 pointwise convolution 部分( s 2 c d s^2cd s2cd)计算量比值为( k 2 : d k^2 :d k2:d),可见depthwise separable convolution计算量主要由 d d d决定。使用5*5卷积核代替3*3卷积核,不会带来太大开销,但是可以增大感受野(receptive field)。基于此作者设计了下面两种结构:
SSD 使用 1×1, 2×2, 4×4, 8×8, 和 16×16 的五层特征图来回归目标,为了GPU计算更友好,因此作者使用6个8×8的特征图来替换原来分别为2个2×2, 4×4, 8×8的特征图。
由于2-2中使用的anchor机制使用较高的(8×8, 和 16×16)分辨率,所以视频检测人脸中,检测出的人脸会明显抖动。作者提出一种blending策略替换原来的NMS,即将边界框的回归参数估计为重叠预测之间的加权平均值。
与MobileNetV2-SSD性能比较
速度很快呀
PyTorch实现:
import torch
import torch.nn as nn
class BlazeBlock(nn.Module):
def __init__(self, in_channels,out_channels,mid_channels=None,stride=1):
super(BlazeBlock, self).__init__()
mid_channels = mid_channels or in_channels
assert stride in [1, 2]
if stride>1:
self.use_pool = True
else:
self.use_pool = False
self.branch1 = nn.Sequential(
nn.Conv2d(in_channels=in_channels,out_channels=mid_channels,kernel_size=5,stride=stride,padding=2,groups=in_channels),
nn.BatchNorm2d(mid_channels),
nn.Conv2d(in_channels=mid_channels,out_channels=out_channels,kernel_size=1,stride=1),
nn.BatchNorm2d(out_channels),
)
if self.use_pool:
self.shortcut = nn.Sequential(
nn.MaxPool2d(kernel_size=stride, stride=stride),
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
nn.BatchNorm2d(out_channels),
)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
branch1 = self.branch1(x)
out = (branch1+self.shortcut(x)) if self.use_pool else (branch1+x)
return self.relu(out)
class DoubleBlazeBlock(nn.Module):
def __init__(self,in_channels,out_channels,mid_channels=None,stride=1):
super(DoubleBlazeBlock, self).__init__()
mid_channels = mid_channels or in_channels
assert stride in [1, 2]
if stride > 1:
self.use_pool = True
else:
self.use_pool = False
self.branch1 = nn.Sequential(
nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=5, stride=stride,padding=2,groups=in_channels),
nn.BatchNorm2d(in_channels),
nn.Conv2d(in_channels=in_channels, out_channels=mid_channels, kernel_size=1, stride=1),
nn.BatchNorm2d(mid_channels),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=mid_channels, out_channels=mid_channels, kernel_size=5, stride=1,padding=2),
nn.BatchNorm2d(mid_channels),
nn.Conv2d(in_channels=mid_channels, out_channels=out_channels, kernel_size=1, stride=1),
nn.BatchNorm2d(out_channels),
)
if self.use_pool:
self.shortcut = nn.Sequential(
nn.MaxPool2d(kernel_size=stride, stride=stride),
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
nn.BatchNorm2d(out_channels),
)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
branch1 = self.branch1(x)
out = (branch1 + self.shortcut(x)) if self.use_pool else (branch1 + x)
return self.relu(out)
class BlazeFace(nn.Module):
def __init__(self):
super(BlazeFace, self).__init__()
self.firstconv = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=24, kernel_size=3, stride=2, padding=1),
nn.BatchNorm2d(24),
nn.ReLU(inplace=True),
)
self.blazeBlock = nn.Sequential(
BlazeBlock(in_channels=24, out_channels=24),
BlazeBlock(in_channels=24, out_channels=24),
BlazeBlock(in_channels=24, out_channels=48, stride=2),
BlazeBlock(in_channels=48, out_channels=48),
BlazeBlock(in_channels=48, out_channels=48),
)
self.doubleBlazeBlock = nn.Sequential(
DoubleBlazeBlock(in_channels=48, out_channels=96, mid_channels=24, stride=2),
DoubleBlazeBlock(in_channels=96, out_channels=96, mid_channels=24),
DoubleBlazeBlock(in_channels=96, out_channels=96, mid_channels=24),
DoubleBlazeBlock(in_channels=96, out_channels=96, mid_channels=24, stride=2),
DoubleBlazeBlock(in_channels=96, out_channels=96, mid_channels=24),
DoubleBlazeBlock(in_channels=96, out_channels=96, mid_channels=24),
)
self.initialize()
def initialize(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def forward(self, x):
x = self.firstconv(x)
x = self.blazeBlock(x)
x = self.doubleBlazeBlock(x)
return x
if __name__=='__main__':
model = BlazeFace()
print(model)
input = torch.randn(1, 3, 128, 128)
out = model(input)
print(out.shape)