MTCNN完整流程

项目地址:https://github.com/lihuaqiang0101/MTCNN-Face-Detection-System
import torch
from Net import Pnet,Rnet,Onet
import numpy as np
from torchvision import transforms

#构建一个NTCNN类用于做测试
class MTCNN:
    #在实例化对象时传入预训练参数
    def __init__(self,pnet_params='',rnet_params='',onet_params=''):
        self.pnet = Pnet()
        self.rnet = Rnet()
        self.onet = Onet()
        if torch.cuda.is_available():
            self.pnet = self.pnet.cuda()
            self.rnet = self.rnet.cuda()
            self.onet = self.onet.cuda()
        self.pnet.load_state_dict(torch.load(pnet_params))
        self.rnet.load_state_dict(torch.load(rnet_params))
        self.onet.load_state_dict(torch.load(onet_params))
        self.To_Tensor =transforms.Compose([
            transforms.ToTensor()
        ])
    def detect(self,img):
        pnet_boxs = self.pnet_detect(img)
        if pnet_boxs.shape[0] == 0:
            return np.array([])
        rnet_boxs = self.rnet(img,pnet_boxs)
        if rnet_boxs.shape[0] == 0:
            return np.array([])
        onet_boxs = self.onet_detect(img,rnet_boxs)
        if onet_boxs.shape[0] == 0:
            return np.array([])
        return onet_boxs
    def pnet_detect(self,img):
        scal = 1
        bboxs = []
        image = img
        w,h = img.size
        side = min(w,h)
        while side > 12:
            imgdata = self.To_Tensor(image)
            if torch.cuda.is_available():
                imgdata = imgdata.cuda()
            conf,off = self.pnet(imgdata)
            confidence,offset = conf.cpu()[0][0].data,off[0].cpu().data
            indexs = torch.nonzero(torch.gt(confidence,0.6))
            for index in indexs:
                bboxs.append(self.box(index,offset,confidence[index[0],index[1]],scal))
            scal *= 0.7
            _w = int(w*scal)
            _h = int(h*scal)
            image = image.resize((_w,_h))
            side = min(_w,_h)
        return self.nms(np.array(bboxs),0.5)

    def rnet_detect(self,img,boxs):
        img_datasets = []
        #转方框
        square_boxs = self.convert_to_square(boxs)
        for box in square_boxs:
            _x1 = int(box[0])
            _y1 = int(box[1])
            _x2 = int(box[2])
            _y2 = int(box[3])
            #将框冲原始图片中裁剪出来
            image = img.crop((_x1,_y1,_x2,_y2))
            image = image.resize((24,24))
            imgdata = self.To_Tensor(image)
            img_datasets.append(imgdata)
        img_datasets = torch.stack(img_datasets)
        if torch.cuda.is_available():
            img_datasets = img_datasets.cuda()
        conf,off = self.rnet(img_datasets)
        confidence,offset = conf.cpu().data.numpy(),off.cpu().data.numpy()
        boxes = []
        indexs,_ = np.where(confidence > 0.7)
        for index in indexs:
            _box = square_boxs[index]
            _x1 = _box[0]
            _y1 = _box[1]
            _x2 = _box[2]
            _y2 = _box[3]
            w = _x2 - _x2
            h = _y2 - _y1
            x1 = _x1 + w * offset[index][0]
            y1 = _y1 + h * offset[index][1]
            x2 = _x2 + w * offset[index][2]
            y2 = _y2 + h * offset[index][3]
            boxes.append([x1,y1,x2,y2,confidence[index][0]])
        return self.nms(np.array(boxes),0.5)

    def onet_detect(self,img,boxs):
        imgdatasets = []
        square_boxes = self.convert_to_square(boxs)
        for box in square_boxes:
            _x1 = int(box[0])
            _y1 = int(box[1])
            _x2 = int(box[2])
            _y2 = int(box[3])
            image = img.crop((_x1,_y1,_x2,_y2))
            image = image.resize((48,48))
            imgdata = self.To_Tensor(image)
            imgdatasets.append(imgdata)
        imgdatasets = torch.stack(imgdatasets)
        if torch.cuda.is_available():
            imgdatasets = imgdatasets.cuda()
        conf,off = self.onet(imgdatasets)
        confidence,offset = conf.cpu().data.numpy(),off.cpu().data.numpy
        boxes = []
        indexs,_ = np.where(confidence > 0.97)
        for index in indexs:
            box = square_boxes[index]
            _x1 = int(box[0])
            _y1 = int(box[1])
            _x2 = int(box[2])
            _y2 = int(box[3])
            w = _x2 - _x1
            h = _y2 - _y1
            x1 = _x1 + offset[index][0] * w
            y1 = _y1 + offset[index][1] * h
            x2 = _x2 + offset[index][2] * w
            y2 = _y2 + offset[index][3] * h
            boxes.append([x1,y1,x2,y2,confidence[index][0]])
        return self.nms(np.array(boxes),0.7,isMin=True)

    def box(self,start_index,offset,conf,scal,stride=2,side_len=12):
        _x1 = (start_index[1]*stride)/scal
        _y1 = (start_index[0]*stride)/scal
        _x2 = (start_index[1]*stride+side_len)/scal
        _y2 = (start_index[0]*stride+side_len)/scal
        w = _x2 - _x1
        h = _y2 - _y1
        _offset = offset[:,start_index[0],start_index[1]]
        x1 = _x1 + _offset[0]*w
        y1 = _y1 + offset[1]*h
        x2 = _x2 + _offset[2]*w
        y2 = _y2 + offset[3]*w
        return [x1,y1,x2,y2,conf]
    def nms(self,boxs,thresh=0.3,isMin=False):
        if boxs.shape[0] == 0:
            return np.array([])
        _boxs = boxs[(-boxs[:,4]).argsort()]
        bboxs = []
        while _boxs.shape[0] > 1:
            a_box = _boxs[0]
            b_boxs = _boxs[1:]
            bboxs.append(a_box)
            index = np.where(self.iou(a_box,b_boxs,isMin) < thresh)
            _boxs = b_boxs[index]
        if _boxs.shape[0] > 0:
            bboxs.append(_boxs[0])
        return np.stack(bboxs)

    def iou(self,box,boxs,isMin=False):
        box_area = (box[2] - box[0])*(box[3] - box[1])
        area = (boxs[:,2] - boxs[:,0])*(boxs[:,3] - boxs[:,0])
        x1 = np.maximum(box[0],boxs[:,0])
        y1 = np.maximum(box[1],boxs[:,1])
        x2 = np.minimum(box[2],boxs[:,2])
        y2 = np.minimum(box[3],boxs[:,3])
        w = np.maximum(0,x2 - x1)
        h = np.maximum(0,y2 - y1)
        inter = w * h
        if isMin:
            ovr = np.true_divide(inter,np.minimum(box_area,area))
        else:
            ovr = np.true_divide(inter,(box_area + area - inter))
        return ovr
    def convert_to_square(self,bbox):
        square_bbox = bbox.copy()
        if bbox.shape[0] == 0:
            return np.array([])
        w = bbox[:,2] - bbox[:,0]
        h = bbox[:3] - bbox[:,1]
        max_side = np.maximum(w,h)
        #对起始坐标做一个微调
        square_bbox[:,0] = bbox[:,0] + w * 0.5 - max_side * 0.5
        square_bbox[:,1] = bbox[:,1] + h * 0.5 - max_side * 0.5
        square_bbox[:,2] = square_bbox[:,0] + max_side
        square_bbox[:,3] = square_bbox[:,1] + max_side
        return square_bbox

效果:

MTCNN完整流程_第1张图片

MTCNN完整流程_第2张图片

你可能感兴趣的:(人工智能,深度学习,计算机视觉)