全卷积网络
中间层:
卷积层:2D卷积,激活函数:PReLU
池化层:最大池化
置信度输出:Sigmoid(激活函数)
回归框输出,地表点回归:线性输出
P_net | in_shape | in_channels | out_channels | kernel_size | stride | padding | out_shape |
---|---|---|---|---|---|---|---|
conv1 | [batch,3,12,12] | 3 | 10 | 3 | 1 | 0 | [batch,10,10,10] |
pool | [batch,10,10,10] | 10 | 10 | 2 | 2 | 1 | [batch,10,5,5] |
conv2 | [batch,10,5,5] | 10 | 16 | 3 | 1 | 0 | [batch,16,3,3] |
conv3 | [batch,16,3,3] | 16 | 32 | 3 | 1 | 0 | [batch,32,1,1] |
conv4_1 | [batch,32,1,1] | 32 | 2 | 1 | 1 | 0 | [batch,1,1,1] |
conv4_2 | [batch,32,1,1] | 32 | 4 | 1 | 1 | 0 | [batch,4,1,1] |
conv4_3 | [batch,32,1,1] | 32 | 10 | 1 | 1 | 0 | [batch,10,1,1] |
R-net = 卷积层 + 全连接层
中间层:
卷积层:2D卷积,激活函数:PReLU
池化层:最大池化
全连接层:PReLU(激活函数)
置信度输出:Sigmoid(激活函数)
回归框输出,地表点回归:线性输出
conv | in_shape | in_channels | out_channels | kernel_size | stride | padding | out_shape |
---|---|---|---|---|---|---|---|
conv1 | [batch,3,24,24] | 3 | 28 | 3 | 1 | 0 | [batch,28,22,22] |
pool1 | [batch,28,22,22] | 28 | 28 | 3 | 2 | 1 | [batch,28,22,22] |
conv2 | [batch,48,11,11] | 28 | 48 | 3 | 1 | 0 | [batch,48,9,9] |
pool2 | [batch,48,9,9] | 48 | 48 | 3 | 2 | 0 | [batch,48,4,4] |
conv3 | [batch,48,4,4] | 48 | 64 | 2 | 1 | 0 | [batch,48,3,3] |
line | in_unit | out_unit |
---|---|---|
line1 | 64*3*3 | 128 |
line2_1 | 128 | 1 |
line2_2 | 128 | 4 |
line3-3 | 128 | 10 |
O-net = 卷积层 + 全连接层
中间层:
卷积层:2D卷积,激活函数:PReLU
池化层:最大池化
全连接层:PReLU(激活函数)
置信度输出:Sigmoid(激活函数)
回归框输出,地表点回归:线性输出
conv | in_shape | in_channnels | out_channels | kernel_size | stride | padding | out_shape |
---|---|---|---|---|---|---|---|
conv1 | [batch,3,48,48] | 3 | 32 | 3 | 1 | 0 | [batch,32,46,46] |
pool1 | [batch,32,46,46] | 32 | 32 | 2 | 2 | 1 | [batch,32,24,24] |
conv2 | [batch,32,24,24] | 32 | 64 | 3 | 1 | 0 | [batch,64,22,22] |
pool2 | [batch,64,22,22] | 64 | 64 | 3 | 2 | 0 | [batch,64,10,10] |
conv3 | [batch,64,10,10] | 64 | 64 | 2 | 1 | 0 | [batch,64,8,8] |
pool3 | [batch,64,8,8] | 64 | 64 | 2 | 2 | 0 | [batch,64,4,4] |
conv4 | [batch,64,4,4] | 64 | 128 | 2 | 1 | 0 | [batch,128,3,3] |
line | in_unit | out_unit |
---|---|---|
line1 | 128*3* 3 | 256 |
line2_1 | 256 | 1 |
line2_2 | 256 | 4 |
line2_3 | 256 | 10 |
代码实现:
import torch
import torch.nn as nn
import torch.nn.functional as F
class PNet(nn.Module):
def __init__(self):
super(PNet, self).__init__()
self.conv_layer = nn.Sequential(
nn.Conv2d(3, 10, kernel_size=3, stride=1), # conv1
nn.PReLU(),
nn.MaxPool2d(kernel_size=2, stride=2), # pool1
nn.Conv2d(10, 16, kernel_size=3, stride=1), # conv2
nn.PReLU(),
nn.Conv2d(16, 32, kernel_size=3, stride=1), # conv3
nn.PReLU()
)
self.conv4_1 = nn.Conv2d(32, 1, kernel_size=1, stride=1)
self.conv4_2 = nn.Conv2d(32, 4, kernel_size=1, stride=1)
self.conv4_3 = nn.Conv2d(32, 10, kernel_size=1, stride=1)
def forward(self, x):
x = self.conv_layer(x)
cond = F.sigmoid(self.conv4_1(x))
box_offset = self.conv4_2(x)
land_offset = self.conv4_3(x)
return cond, box_offset, land_offset
class RNet(nn.Module):
def __init__(self):
super(RNet, self).__init__()
self.conv_layer = nn.Sequential(
nn.Conv2d(3, 28, kernel_size=3, stride=1), # conv1
nn.PReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1), # pool1
nn.Conv2d(28, 48, kernel_size=3, stride=1), # conv2
nn.PReLU(),
nn.MaxPool2d(kernel_size=3, stride=2), # pool2
nn.Conv2d(48, 64, kernel_size=2, stride=1), # conv3
nn.PReLU()
)
self.line1 = nn.Sequential(
nn.Linear(64 * 3 * 3, 128),
nn.PReLU()
)
self.line2_1 = nn.Linear(128, 1)
self.line2_2 = nn.Linear(128, 4)
self.line2_3 = nn.Linear(128, 10)
def forward(self, x):
x = self.conv_layer(x)
x = x.view(x.size(0), -1)
x = self.line1(x)
label = F.sigmoid(self.conv5_1(x))
box_offset = self.conv5_2(x)
land_offset = self.conv5_3(x)
return label, box_offset, land_offset
class ONet(nn.Module):
def __init__(self):
super(ONet, self).__init__()
self.conv_layer = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, stride=1), # conv1
nn.PReLU(),
nn.MaxPool2d(kernel_size=2, stride=2, padding=1), # pool1
nn.Conv2d(32, 64, kernel_size=3, stride=1), # conv2
nn.PReLU(),
nn.MaxPool2d(kernel_size=3, stride=2), # pool2
nn.Conv2d(64, 64, kernel_size=3, stride=1), # conv3
nn.PReLU(),
nn.MaxPool2d(kernel_size=2, stride=2), # pool3
nn.Conv2d(64, 128, kernel_size=2, stride=1), # conv4
nn.PReLU()
)
self.line1 = nn.Sequential(
nn.Linear(128 * 3 * 3, 256),
nn.PReLU()
)
self.line2_1 = nn.Linear(256, 1)
self.line2_2 = nn.Linear(256, 4)
self.line2_3 = nn.Linear(256, 10)
def forward(self, x):
x = self.pre_layer(x)
x = x.view(x.size(0), -1)
x = self.conv5(x)
x = self.prelu5(x)
label = F.sigmoid(self.line2_1(x))
box_offset = self.line2_2(x)
land_offset = self.line2_3(x)
return label, box_offset, land_offset
import torch
from torch.utils import data
from MTCNN_Pytorch import simpling # 导入数据类
import numpy as np
import os
class Trainer:
"""
训练网络
"""
def __init__(self, train_net, batch_size, data_path, save_model_path, lr=0.001, isCuda=True):
"""
初始化类
:param train_net: net
:param batch_size: 批次大小
:param data_path: 训练集地址
:param isCuda: 是否使用CUDA,默认:True
:param lr: 学习率 默认:0.001
:param save_model_path: 保存模型地址
"""
self.model = train_net
self.data_path = data_path
self.batch_size = batch_size
self.lr = lr
self.isCuda = isCuda
self.save_path = save_model_path
if os.path.exists(self.save_path): # 如果有保存的模型,加载模型
self.model = torch.load(self.save_path)
if self.isCuda:
self.model.cuda()
self.face_loss = torch.nn.BCELoss()
self.offset_loss = torch.nn.MSELoss()
self.opt = torch.optim.Adam(params=self.model.parameters(), lr=self.lr)
self.train_net() # 调用训练方法
def train_net(self):
epoch = 1 # 记录训练次数
IMG_DATA = simpling.FaceDataset(self.data_path) # 获取数据
for _ in range(10000): # 将所有数据训练1000次
train_data = data.DataLoader(IMG_DATA, batch_size=self.batch_size, shuffle=True, num_workers=4)
for train in train_data:
# 获取数据
# img_data :[512, 3, 24, 24]
# label :[512, 1]
# offset :[512, 4]
img_data, label, box_offset, land_offset = train
if self.isCuda:
img_data = img_data.cuda()
box_offset = box_offset.cuda()
land_offset = land_offset.cuda()
# 获取网络输出:P-net
# face_out : [512, 2, 1, 1]
# box_offset_out: [512, 4, 1, 1]
# land_offset_out: [512,10,1,1]
# R-net、O-net
# face_out : [512, 2, 1, 1]
# box_offset_out: [512, 4, 1, 1]
# land_offset_out: [512,10,1,1]
face_out, box_offset_out,land_offset_out= self.model(img_data)
# 降维 [512, 2, 1, 1] => [512,2]
face_out = face_out.squeeze()
box_offset_out = box_offset_out.squeeze()
land_offset_out = land_offset_out.squeeze()
# 获取1 和 0 做人脸损失
one = torch.ne(label, 2) # one : torch.Size([512, 1])
one = one.squeeze() # one : torch.Size([512]) 掩码输出: 1,0 int8
# 获取1 和 2 做回归框损失
two = torch.ne(label, 0) # two : [512,1]
two = two.squeeze() # two : [512]
# 将标签转为one_hot编码
label_10 = label[one] # [batch,1]
label_10 = torch.Tensor([self.one_hot(int(i)) for i in label_10.squeeze().numpy()]) # [batch,2]
# 得到人脸损失,和偏移量损失
face_loss = self.face_loss(face_out[one], label_10.cuda())
box_offset_loss = self.offset_loss(box_offset_out[two], box_offset[two])
land_offset_loss = self.offset_loss(land_offset_out[two],land_offset[two])
# 损失相加
self.loss = face_loss + box_offset_loss + land_offset_loss
# 优化损失
self.opt.zero_grad()
self.loss.backward()
self.opt.step()
# 每训练100次,输出损失,并保存数据
epoch += 1
if epoch % 100 == 0:
print('Epoch:', epoch, ' Loss:', self.loss.cpu().item())
torch.save(self.model, self.save_path)
def one_hot(self, data):
"""
one_hot编码
:param data:一个值,
:return: one_hot编码后的值
"""
hot = np.zeros([2])
hot[data] = 1
return hot
if __name__ == '__main__':
pass
# p_train = Trainer(Pnet, 512,r"C:\Users\Administrator\Desktop\Celeba数据集\Celeba4\12", r'.\log_P_train.pt')
# r_train = Trainer(net.Rnet(), 512, r"C:\Users\Administrator\Desktop\Celeba数据集\Celeba4\24", r'.\log_R_train.pt')
# o_train = Trainer(net.Onet(), 512, r"C:\Users\Administrator\Desktop\Celeba数据集\Celeba4\48", r'.\log_O_train.pt')
from MTCNN_Pytorch import train,nets
if __name__ == '__main__':
train.Trainer(net.Pnet(), 512, r"C:\Users\Administrator\Desktop\Celeba数据集\Celeba4\12",r'.\log_P_trian')
from MTCNN_Pytorch import train,nets
if __name__ == '__main__':
train.Trainer(net.Rnet(), 512, r"C:\Users\Administrator\Desktop\Celeba数据集\Celeba4\24", r'.\log_R_trian')
from MTCNN_Pytorch import train,nets
if __name__ == '__main__':
train.Trainer(net.Onet(), 512, r"C:\Users\Administrator\Desktop\train_data\48/",r'.\log_O_trian')
import torch
from MTCNN_Pytorch import nets, util
from PIL import Image, ImageDraw
import numpy as np
import os
# 只能传入一张图片进行扫描
class Test():
def __init__(self, photo, net):
self.photo = photo
self.img, self.img_data, self.img_x, self.img_y = self.get_img_data(photo)
self.model = net
print(self.model)
self.model = torch.load(r'C:\Users\Administrator\Desktop\myproject\MTCNN\log_P_trian')
# self.model.eval()
self.get_net_out()
def get_img_data(self, image): # 1. 获取图片数据
img = 0
if type(image) == str and os.path.exists(image) == True:
img = Image.open(image)
else:
img = image
img_x, img_y = img.size
# 将图片转成数组形式
img_data = torch.Tensor(np.array(img))
# 2.对数据进行处理
img_data = img_data / 255 - 0.5
img_data = img_data.unsqueeze(0) # 在原有维度 的第一维度升维
return img, img_data.permute(0, 3, 1, 2), img_x, img_y
# 3. 获取 P net 输出
def get_net_out(self):
have_face = [] # 存储网络输出值
box_face = []
count = 1
while True: # 图像金字塔
if self.img_size(self.img_x, self.img_y) == True:
face_out, offset = self.model(self.img_data) # 获取P-net输出
# face_out.size() [1, 2, 145, 295]
# offset.size() [1, 4, 145, 295]
box_offset = self.add_index(offset)
# 改变形状:[1, 2, 145, 295] ==> [42775, 2]
face_out = face_out.view(-1, face_out.size(1)) # torch.Size([42775, 2])
# 改变形状:[ 145, 295,6]==> [42775, 6]
offset = box_offset.view(-1, box_offset.size(2)) # torch.Size([42775, 4])
# 获取最大值索引
face_out = torch.argmax(face_out, 1)
# 保存输出
have_face.extend(face_out.detach().numpy())
box_face.extend(offset.detach().numpy())
# print(count, face_out.detach().numpy().shape, offset.detach().numpy().shape)
count += 1 # 将图片缩小一次 +1
# 图像缩小0.7倍
self.img_x = int(self.img_x * 0.7)
self.img_y = int(self.img_y * 0.7)
img2 = self.img.resize((self.img_x, self.img_y))
# 获取图片数据
self.img_data = self.get_img_data(img2)
self.img_data = self.img_data[1]
print(np.array(have_face).shape)
one = torch.ne(torch.Tensor(have_face), 0) # 获取非0索引
print(one)
have_face_box = torch.Tensor(box_face)[one] # 获取有人脸的偏移量
self.filter(have_face_box)
else:
break
self.img.show()
# 4.筛选重复的框
def filter(self, offset):
# offset: [batch,4]
# width
# stride
box = offset[:, :4] * 12 # 将四个偏移量乘以大卷积核宽
x = offset[:, 4] * 2
y = offset[:, 5] * 2
box_x_l = box[:, 0] + x # 加上 卷积后的横索引*2
box_y_l = box[:, 1] + y # 加上 卷积后的竖索引*2
box_x_r = box[:, 2] + x # 加上 卷积后的横索引*2
box_y_r = box[:, 3] + y # 加上 卷积后的竖索引*2
###########################返回坐标
draw = ImageDraw.Draw(self.img)
draw.rectangle((x[1], y[1], x[1] + 12, y[1] + 12), width=3)
box = box_x_l
box = np.hstack([box, box_y_l])
box = np.hstack([box, box_x_r])
box = np.hstack([box, box_y_r])
box = box.reshape(-1, 4)
# print(box.max(1))
area = (box_x_r - box_x_l) * (box_y_r - box_y_l)
area_max_index = np.argmax(area)
box_max = box[area_max_index]
area_iou = util.iou(box_max, box)
area_iou = area_iou.reshape((area.shape[0], 1))
box_iou = np.hstack((box, area_iou))
n = util.nms(box_iou)
# print(n.shape)
# draw = ImageDraw.Draw(self.img)
area_max_index = np.argmax((n[:, 3] - n[:, 1]) * (n[:, 4] - n[:, 2]))
box = n[area_max_index]
draw.rectangle((box[0], box[1], box[2], box[3]), width=3)
# print(np.array(area).max())
# 判断
def img_size(self, img_x, img_y):
if img_x < 12 or img_y < 12: # 如果图片宽和高都小于12,退出循环
return False
if img_x > 12 or img_y > 12:
return True
def add_index(self, offset):
######################### 添加索引进原数据的后面
# [1,145, 295, 4]
# 1.降维[145, 295, 4]
# 2.换轴 [4,145, 295]
offset = offset.view((offset.size(2), offset.size(3), offset.size(1)))
# print('降维后:', offset.size())
# 一张图片
list = []
box_offset = [] # 使用水平堆栈将对应的值添加到对应的位置
for i,j in zip(np.array(np.arange(offset.size(0))),offset.detach().numpy()): # 迭代每行
list = [[i, j] for j in range(offset.size(1))] # 迭代每列
# print(np.array(j).shape)
list_1 = np.array(list)
# print(list_1.shape)
box_offset.append(np.hstack([j, list_1]))
# print(np.array(box_offset).shape)
# (145, 295, 2)
# box_offset = [] # 使用水平堆栈将对应的值添加到对应的位置
# for i, j in zip(offset.detach().numpy(), list):
# box_offset.append(np.hstack([i, j]))
box_offset = torch.Tensor(box_offset)
# print('获取水平堆栈后的形状:', box_offset.size())
return box_offset
#########################
if __name__ == '__main__':
Test(r'test_photo.jpg', net.Pnet())