项目地址:https://github.com/lihuaqiang0101/MTCNN-Face-Detection-System
import torch
from Net import Pnet,Rnet,Onet
import numpy as np
from torchvision import transforms
#构建一个NTCNN类用于做测试
class MTCNN:
#在实例化对象时传入预训练参数
def __init__(self,pnet_params='',rnet_params='',onet_params=''):
self.pnet = Pnet()
self.rnet = Rnet()
self.onet = Onet()
if torch.cuda.is_available():
self.pnet = self.pnet.cuda()
self.rnet = self.rnet.cuda()
self.onet = self.onet.cuda()
self.pnet.load_state_dict(torch.load(pnet_params))
self.rnet.load_state_dict(torch.load(rnet_params))
self.onet.load_state_dict(torch.load(onet_params))
self.To_Tensor =transforms.Compose([
transforms.ToTensor()
])
def detect(self,img):
pnet_boxs = self.pnet_detect(img)
if pnet_boxs.shape[0] == 0:
return np.array([])
rnet_boxs = self.rnet(img,pnet_boxs)
if rnet_boxs.shape[0] == 0:
return np.array([])
onet_boxs = self.onet_detect(img,rnet_boxs)
if onet_boxs.shape[0] == 0:
return np.array([])
return onet_boxs
def pnet_detect(self,img):
scal = 1
bboxs = []
image = img
w,h = img.size
side = min(w,h)
while side > 12:
imgdata = self.To_Tensor(image)
if torch.cuda.is_available():
imgdata = imgdata.cuda()
conf,off = self.pnet(imgdata)
confidence,offset = conf.cpu()[0][0].data,off[0].cpu().data
indexs = torch.nonzero(torch.gt(confidence,0.6))
for index in indexs:
bboxs.append(self.box(index,offset,confidence[index[0],index[1]],scal))
scal *= 0.7
_w = int(w*scal)
_h = int(h*scal)
image = image.resize((_w,_h))
side = min(_w,_h)
return self.nms(np.array(bboxs),0.5)
def rnet_detect(self,img,boxs):
img_datasets = []
#转方框
square_boxs = self.convert_to_square(boxs)
for box in square_boxs:
_x1 = int(box[0])
_y1 = int(box[1])
_x2 = int(box[2])
_y2 = int(box[3])
#将框冲原始图片中裁剪出来
image = img.crop((_x1,_y1,_x2,_y2))
image = image.resize((24,24))
imgdata = self.To_Tensor(image)
img_datasets.append(imgdata)
img_datasets = torch.stack(img_datasets)
if torch.cuda.is_available():
img_datasets = img_datasets.cuda()
conf,off = self.rnet(img_datasets)
confidence,offset = conf.cpu().data.numpy(),off.cpu().data.numpy()
boxes = []
indexs,_ = np.where(confidence > 0.7)
for index in indexs:
_box = square_boxs[index]
_x1 = _box[0]
_y1 = _box[1]
_x2 = _box[2]
_y2 = _box[3]
w = _x2 - _x2
h = _y2 - _y1
x1 = _x1 + w * offset[index][0]
y1 = _y1 + h * offset[index][1]
x2 = _x2 + w * offset[index][2]
y2 = _y2 + h * offset[index][3]
boxes.append([x1,y1,x2,y2,confidence[index][0]])
return self.nms(np.array(boxes),0.5)
def onet_detect(self,img,boxs):
imgdatasets = []
square_boxes = self.convert_to_square(boxs)
for box in square_boxes:
_x1 = int(box[0])
_y1 = int(box[1])
_x2 = int(box[2])
_y2 = int(box[3])
image = img.crop((_x1,_y1,_x2,_y2))
image = image.resize((48,48))
imgdata = self.To_Tensor(image)
imgdatasets.append(imgdata)
imgdatasets = torch.stack(imgdatasets)
if torch.cuda.is_available():
imgdatasets = imgdatasets.cuda()
conf,off = self.onet(imgdatasets)
confidence,offset = conf.cpu().data.numpy(),off.cpu().data.numpy
boxes = []
indexs,_ = np.where(confidence > 0.97)
for index in indexs:
box = square_boxes[index]
_x1 = int(box[0])
_y1 = int(box[1])
_x2 = int(box[2])
_y2 = int(box[3])
w = _x2 - _x1
h = _y2 - _y1
x1 = _x1 + offset[index][0] * w
y1 = _y1 + offset[index][1] * h
x2 = _x2 + offset[index][2] * w
y2 = _y2 + offset[index][3] * h
boxes.append([x1,y1,x2,y2,confidence[index][0]])
return self.nms(np.array(boxes),0.7,isMin=True)
def box(self,start_index,offset,conf,scal,stride=2,side_len=12):
_x1 = (start_index[1]*stride)/scal
_y1 = (start_index[0]*stride)/scal
_x2 = (start_index[1]*stride+side_len)/scal
_y2 = (start_index[0]*stride+side_len)/scal
w = _x2 - _x1
h = _y2 - _y1
_offset = offset[:,start_index[0],start_index[1]]
x1 = _x1 + _offset[0]*w
y1 = _y1 + offset[1]*h
x2 = _x2 + _offset[2]*w
y2 = _y2 + offset[3]*w
return [x1,y1,x2,y2,conf]
def nms(self,boxs,thresh=0.3,isMin=False):
if boxs.shape[0] == 0:
return np.array([])
_boxs = boxs[(-boxs[:,4]).argsort()]
bboxs = []
while _boxs.shape[0] > 1:
a_box = _boxs[0]
b_boxs = _boxs[1:]
bboxs.append(a_box)
index = np.where(self.iou(a_box,b_boxs,isMin) < thresh)
_boxs = b_boxs[index]
if _boxs.shape[0] > 0:
bboxs.append(_boxs[0])
return np.stack(bboxs)
def iou(self,box,boxs,isMin=False):
box_area = (box[2] - box[0])*(box[3] - box[1])
area = (boxs[:,2] - boxs[:,0])*(boxs[:,3] - boxs[:,0])
x1 = np.maximum(box[0],boxs[:,0])
y1 = np.maximum(box[1],boxs[:,1])
x2 = np.minimum(box[2],boxs[:,2])
y2 = np.minimum(box[3],boxs[:,3])
w = np.maximum(0,x2 - x1)
h = np.maximum(0,y2 - y1)
inter = w * h
if isMin:
ovr = np.true_divide(inter,np.minimum(box_area,area))
else:
ovr = np.true_divide(inter,(box_area + area - inter))
return ovr
def convert_to_square(self,bbox):
square_bbox = bbox.copy()
if bbox.shape[0] == 0:
return np.array([])
w = bbox[:,2] - bbox[:,0]
h = bbox[:3] - bbox[:,1]
max_side = np.maximum(w,h)
#对起始坐标做一个微调
square_bbox[:,0] = bbox[:,0] + w * 0.5 - max_side * 0.5
square_bbox[:,1] = bbox[:,1] + h * 0.5 - max_side * 0.5
square_bbox[:,2] = square_bbox[:,0] + max_side
square_bbox[:,3] = square_bbox[:,1] + max_side
return square_bbox
效果: