全称为Proposal Network,其基本的构造是一个全连接网络。对上一步构建完成的图像金字塔,通过一个FCN进行初步特征提取与标定边框,并进行Bounding-Box Regression调整窗口与NMS进行大部分窗口的过滤。
PNet是一个人脸区域的区域建议网络,该网络的将特征输入结果三个卷积层之后,再通过3种不同卷积网络之后,得到三种不同的tensor,分别对应如下:
1 * 1 * 2 :这个用于是人脸分类,但是与论文不同,实际实现是输出的是 1 * 1 * 1 的tensor,只有一共channel,代码中当这个channel大于0.6的时候则认为是人脸。
1 * 1 * 4 :这个用于人类框的标记,由4个channel(特征)组成,(x1,y1) (x2,y2) 代表方框的左上、右下两个点的坐标。
1 * 1 * 10 :这个是人类轮廓,由10个channel组成,PNet的输出并不需要。
PNet的输入是一个12 * 12 * 3 的图片。输出是上面的1和2两种tensor,人脸轮廓PNet无需得到,交给后面的
RNet即可。
#torch package
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import sys
sys.path.append('../')
# add other package
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from tool.plotcm import plot_confusion_matrix
import tool.image_tools
import pdb
from collections import OrderedDict
from collections import namedtuple
from itertools import product
#torch.set_printoptions(linewidth=120)
def weights_init(m):
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight.data)
nn.init.constant_(m.bias, 0.1)
由说明可知,PNet需要三个卷积网络,并且输出都要进行非线性处理。
#input 12*12*3
class PNet(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(in_channels=3,out_channels=10,kernel_size=3)
self.conv2 = nn.Conv2d(in_channels=10,out_channels=16,kernel_size=3)
self.out = nn.Conv2d(in_channels=16,out_channels=32,kernel_size=3)
self.det = nn.Conv2d(in_channels=32,out_channels=1,kernel_size=1)
self.bound = nn.Conv2d(in_channels=32,out_channels=4,kernel_size=1)
self.landmark = nn.Conv2d(in_channels=32,out_channels=10,kernel_size=1)
self.apply(weights_init)
pass
def forward(self,tensor):
#layer input
input=tensor
#layer 1
t=self.conv1(input)
t = F.relu(t)
#print('pnet conv1 shape:',t.shape)
t=F.max_pool2d(t,kernel_size=2,stride=2)
#print('pnet mp1 shape:',t.shape)
#layer 2
t=self.conv2(t)
t = F.relu(t)
#print('pnet conv2 shape:',t.shape)
#layer 3
t = self.out(t)
#print('pnet out shape:',t.shape)
# t = F.relu(t)
#out label face 1*1*2
det = self.det(t)
label = torch.sigmoid(det)
#out bounding box (1*1*4)
bound = self.bound(t)
offset = F.relu(bound)
#landmark = self.landmark(t)
return label,offset
pass
if __name__ == '__main__':
t = torch.rand([4,3,12,12])
label = torch.randn([4])
print(label)
print(t.shape)
pnet =PNet()
plabel,offset = pnet(t)
plabel = plabel.squeeze()
mask = torch.ge(plabel,0)
valid_gt_cls = torch.masked_select(plabel,mask)
prob_ones = torch.ge(valid_gt_cls,0.2)
print('b',plabel)
print('a:',mask)
print('c',valid_gt_cls)
print('c',prob_ones)
tensor([ 0.7816, 1.7310, 0.3439, -0.6427])
torch.Size([4, 3, 12, 12])
b tensor([0.5679, 0.5057, 0.6006, 0.5361], grad_fn=)
a: tensor([True, True, True, True])
c tensor([0.5679, 0.5057, 0.6006, 0.5361], grad_fn=)
c tensor([True, True, True, True])
损失函数的设计关系到神经网络是否准确,由于RNet得到1和2两个不同类型的tensor,因此损失计算要用两种不同的偏差,并且按照一定的权重相加得到综合的一个评估,通过综合结果来计算梯度,更新权重值。
#torch package
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import sys
sys.path.append('../')
# add other package
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from tool.plotcm import plot_confusion_matrix
import tool.image_tools
import pdb
from collections import OrderedDict
from collections import namedtuple
from itertools import product
class LossFn:
def __init__(self, cls_factor=1, box_factor=1, landmark_factor=1):
# loss function
self.cls_factor = cls_factor
self.box_factor = box_factor
self.land_factor = landmark_factor
self.loss_cls = nn.BCELoss()
# binary cross entropy
self.loss_box = nn.MSELoss()
# mean square error
self.loss_landmark = nn.MSELoss()
def cls_loss(self,gt_label,pred_label):
pred_label = torch.squeeze(pred_label)
gt_label = torch.squeeze(gt_label)
# get the mask element which >= 0, only 0 and 1 can effect the detection loss
mask = torch.ge(gt_label,0)
valid_gt_label = torch.masked_select(gt_label,mask)
valid_pred_label = torch.masked_select(pred_label,mask)
return self.loss_cls(valid_pred_label,valid_gt_label)*self.cls_factor
def box_loss(self,gt_label,gt_offset,pred_offset):
pred_offset = torch.squeeze(pred_offset)
gt_offset = torch.squeeze(gt_offset)
gt_label = torch.squeeze(gt_label)
#get the mask element which != 0
unmask = torch.eq(gt_label,0)
mask = torch.eq(unmask,0)
#convert mask to dim index
chose_index = torch.nonzero(mask.data)
chose_index = torch.squeeze(chose_index)
#only valid element can effect the loss
valid_gt_offset = gt_offset[chose_index,:]
valid_pred_offset = pred_offset[chose_index,:]
return self.loss_box(valid_pred_offset,valid_gt_offset)*self.box_factor