mtcnn_basemodel
class PNet(nn.Module):
def __init__(self):
super(PNet, self).__init__()
self.features = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(3, 10, 3, 1)),
('prelu1', nn.PReLU(10)),
('pool1', nn.MaxPool2d(2, 2, ceil_mode=True)),
('conv2', nn.Conv2d(10, 16, 3, 1)),
('prelu2', nn.PReLU(16)),
('conv3', nn.Conv2d(16, 32, 3, 1)),
('prelu3', nn.PReLU(32))
]))
self.conv4_1 = nn.Conv2d(32, 2, 1, 1)
self.conv4_2 = nn.Conv2d(32, 4, 1, 1)
weights = np.load('src/weights/pnet.npy')[()]
for n, p in self.named_parameters():
p.data = torch.FloatTensor(weights[n])
def forward(self, x):
"""
Arguments:
x: a float tensor with shape [batch_size, 3, h, w].
Returns:
b: a float tensor with shape [batch_size, 4, h, w].
a: a float tensor with shape [batch_size, 2, h, w].
"""
x = self.features(x)
a = self.conv4_1(x)
b = self.conv4_2(x)
a = F.softmax(a)
return b, a
class RNet(nn.Module):
def __init__(self):
super(RNet, self).__init__()
self.features = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(3, 28, 3, 1)),
('prelu1', nn.PReLU(28)),
('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)),
('conv2', nn.Conv2d(28, 48, 3, 1)),
('prelu2', nn.PReLU(48)),
('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)),
('conv3', nn.Conv2d(48, 64, 2, 1)),
('prelu3', nn.PReLU(64)),
('flatten', Flatten()),
('conv4', nn.Linear(576, 128)),
('prelu4', nn.PReLU(128))
]))
self.conv5_1 = nn.Linear(128, 2)
self.conv5_2 = nn.Linear(128, 4)
weights = np.load('src/weights/rnet.npy')[()]
for n, p in self.named_parameters():
p.data = torch.FloatTensor(weights[n])
def forward(self, x):
"""
Arguments:
x: a float tensor with shape [batch_size, 3, h, w].
Returns:
b: a float tensor with shape [batch_size, 4].
a: a float tensor with shape [batch_size, 2].
"""
x = self.features(x)
a = self.conv5_1(x)
b = self.conv5_2(x)
a = F.softmax(a)
return b, a
class ONet(nn.Module):
def __init__(self):
super(ONet, self).__init__()
self.features = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(3, 32, 3, 1)),
('prelu1', nn.PReLU(32)),
('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)),
('conv2', nn.Conv2d(32, 64, 3, 1)),
('prelu2', nn.PReLU(64)),
('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)),
('conv3', nn.Conv2d(64, 64, 3, 1)),
('prelu3', nn.PReLU(64)),
('pool3', nn.MaxPool2d(2, 2, ceil_mode=True)),
('conv4', nn.Conv2d(64, 128, 2, 1)),
('prelu4', nn.PReLU(128)),
('flatten', Flatten()),
('conv5', nn.Linear(1152, 256)),
('drop5', nn.Dropout(0.25)),
('prelu5', nn.PReLU(256)),
]))
self.conv6_1 = nn.Linear(256, 2)
self.conv6_2 = nn.Linear(256, 4)
self.conv6_3 = nn.Linear(256, 10)
weights = np.load('src/weights/onet.npy')[()]
for n, p in self.named_parameters():
p.data = torch.FloatTensor(weights[n])
def forward(self, x):
"""
Arguments:
x: a float tensor with shape [batch_size, 3, h, w].
Returns:
c: a float tensor with shape [batch_size, 10].
b: a float tensor with shape [batch_size, 4].
a: a float tensor with shape [batch_size, 2].
"""
x = self.features(x)
a = self.conv6_1(x)
b = self.conv6_2(x)
c = self.conv6_3(x)
a = F.softmax(a)
return c, b, a
try_mtcnn_step_by_step
%load_ext autoreload
%autoreload 2
import numpy as np
import torch
from PIL import Image
from torch.autograd import Variable
from src.get_nets import PNet, RNet, ONet
from src.box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
from src.first_stage import run_first_stage
from src.visualization_utils import show_bboxes
Load models
pnet = PNet()
rnet = RNet()
onet = ONet()
Set hyperparameters
# if this value is too low the algorithm will use a lot of memory
min_face_size = 15.0
# for probabilities
thresholds = [0.6, 0.7, 0.8]
# for NMS
nms_thresholds=[0.7, 0.7, 0.7]
Load an image
image = Image.open('images/office5.jpg')
image
build an image pyramid
width, height = image.size
min_length = min(height, width)
min_detection_size = 12
factor = 0.707 # sqrt(0.5)
# scales for scaling the image
scales = []
# scales the image so that
# minimum size that we can detect equals to
# minimum face size that we want to detect
m = min_detection_size/min_face_size
min_length *= m
factor_count = 0
while min_length > min_detection_size:
scales.append(m*factor**factor_count)
min_length *= factor
factor_count += 1
print('scales:', ['{:.2f}'.format(s) for s in scales])
print('number of different scales:', len(scales))
P-Net
bounding_boxes = []
# run P-Net on different scales
for s in scales:
boxes = run_first_stage(image, pnet, scale=s, threshold=thresholds[0])
bounding_boxes.append(boxes)
# collect boxes (and offsets, and scores) from different scales
bounding_boxes = [i for i in bounding_boxes if i is not None]
bounding_boxes = np.vstack(bounding_boxes)
print('number of bounding boxes:', len(bounding_boxes))
show_bboxes(image, bounding_boxes)
NMS + calibration
keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
bounding_boxes = bounding_boxes[keep]
# use offsets predicted by pnet to transform bounding boxes
bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
# shape [n_boxes, 5]
bounding_boxes = convert_to_square(bounding_boxes)
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
print('number of bounding boxes:', len(bounding_boxes))
show_bboxes(image, bounding_boxes)
R-Net
img_boxes = get_image_boxes(bounding_boxes, image, size=24)
img_boxes = Variable(torch.FloatTensor(img_boxes), volatile=True)
output = rnet(img_boxes)
offsets = output[0].data.numpy() # shape [n_boxes, 4]
probs = output[1].data.numpy() # shape [n_boxes, 2]
keep = np.where(probs[:, 1] > thresholds[1])[0]
bounding_boxes = bounding_boxes[keep]
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
offsets = offsets[keep]
print('number of bounding boxes:', len(bounding_boxes))
show_bboxes(image, bounding_boxes)
NMS + calibration
keep = nms(bounding_boxes, nms_thresholds[1])
bounding_boxes = bounding_boxes[keep]
bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
bounding_boxes = convert_to_square(bounding_boxes)
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
print('number of bounding boxes:', len(bounding_boxes))
show_bboxes(image, bounding_boxes)
O-Net
img_boxes = get_image_boxes(bounding_boxes, image, size=48)
img_boxes = Variable(torch.FloatTensor(img_boxes), volatile=True)
output = onet(img_boxes)
landmarks = output[0].data.numpy() # shape [n_boxes, 10]
offsets = output[1].data.numpy() # shape [n_boxes, 4]
probs = output[2].data.numpy() # shape [n_boxes, 2]
keep = np.where(probs[:, 1] > thresholds[2])[0]
bounding_boxes = bounding_boxes[keep]
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
offsets = offsets[keep]
landmarks = landmarks[keep]
# compute landmark points
width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1)*landmarks[:, 0:5]
landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1)*landmarks[:, 5:10]
print('number of bounding boxes:', len(bounding_boxes))
show_bboxes(image, bounding_boxes, landmarks)
NMS + calibration
bounding_boxes = calibrate_box(bounding_boxes, offsets)
keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
bounding_boxes = bounding_boxes[keep]
landmarks = landmarks[keep]
print('number of bounding boxes:', len(bounding_boxes))
show_bboxes(image, bounding_boxes, landmarks)
换脸训练
import torch
from torch import nn, optim
from torch.autograd import Variable
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from torchvision.utils import save_image
import torch.nn.functional as F
import os
import matplotlib.pyplot as plt
import argparse
import random
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
class autoencoder(nn.Module):
def __init__(self,emdsize = 10):
super(autoencoder, self).__init__()
self.emdsize = emdsize
self.encoder = nn.Sequential(
nn.Conv2d(1, 64, 3, stride=2, padding=1),
nn.ReLU(True),
nn.MaxPool2d(2, stride=2),
nn.Conv2d(64, emdsize, 3, stride=2, padding=1),
nn.ReLU(True),
nn.MaxPool2d(2, stride=2),
)
self.decoder_src = nn.Sequential(
nn.ConvTranspose2d(emdsize, 64, 2, stride=2),
nn.ReLU(True),
nn.ConvTranspose2d(64, 64, 3, stride=2, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(64, 64, 3, stride=2),
nn.ReLU(True),
nn.ConvTranspose2d(64, 1, 2, stride=2, padding=1)
)
#结构是一样的,参数是不一样的
self.decoder_dst = nn.Sequential(
nn.ConvTranspose2d(emdsize, 64, 2, stride=2),
nn.ReLU(True),
nn.ConvTranspose2d(64, 64, 3, stride=2, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(64, 64, 3, stride=2),
nn.ReLU(True),
nn.ConvTranspose2d(64, 1, 2, stride=2, padding=1)
)
def forward(self, x, y):
encode_src = self.encoder(x)
encode_dst = self.encoder(y)
decode_src = self.decoder_src(encode_src)
decode_dst = self.decoder_dst(encode_dst)
return encode_src,encode_dst,decode_src,decode_dst
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.main = nn.Sequential(
nn.Conv2d(1, 64, 3, stride=2, padding=1),
nn.ReLU(True),
nn.MaxPool2d(2, stride=2),
nn.Conv2d(64, 64, 3, stride=2, padding=1),
nn.ReLU(True),
nn.MaxPool2d(2, stride=2),
nn.Conv2d(64, 1, 2),
nn.Sigmoid()
)
def forward(self, input):
return self.main(input)
batch_size = 128
train_dataset_src = dset.ImageFolder(root=dataroot,
transform=transforms.Compose([
transforms.Resize(image_size),
transforms.CenterCrop(image_size),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
]))
train_dataset_dst = dset.ImageFolder(root=dataroot,
transform=transforms.Compose([
transforms.Resize(image_size),
transforms.CenterCrop(image_size),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
]))
train_loader_src = DataLoader(train_dataset_src, shuffle=True, batch_size=batch_size)
train_loader_dst = DataLoader(train_dataset_dst, shuffle=True, batch_size=batch_size)
model = autoencoder(emdsize = 10)
D_src = Discriminator()
D_dst = Discriminator()
optimizerD_src = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerD_dst = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG_src = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG_dst = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))
reallabel = torch.Tensor(np.ones(batch_size))
fakelabel = torch.Tensor(np.zeros(batch_size))
criterion = nn.BCELoss()
for epoch in range(num_epochs):
for i in range(num_batchs):
optimizerD_src.zero_grad()
optimizerD_dst.zero_grad()
optimizerG_src.zero_grad()
optimizerG_dst.zero_grad()
src = train_loader_src[i]
dst = train_loader_dst[i]
encode_src,encode_dst,decode_src,decode_dst = model(src,dst)
real_src = D_src(src).view(-1)
real_dst = D_dst(dst).view(-1)
fake_src = D_src(decode_src).view(-1)
fake_dst = D_dst(decode_dst).view(-1)
loss_D_src = criterion(real_src,reallabel)+criterion(fake_src,fakelabel)
loss_D_dst = criterion(real_dst,reallabel)+criterion(fake_dst,fakelabel)
loss_G_src = criterion(fake_src,reallabel)
loss_G_dst = criterion(fake_dst,reallabel)
loss_D_src.backward()
loss_D_dst.backward()
loss_G_src.backward()
loss_G_dst.backward()
optimizerD_src.step()
optimizerD_dst.step()
optimizerG_src.step()
optimizerG_dst.step()