人脸关键点检测,是输入一张人脸图片,模型会返回人脸关键点的一系列坐标,从而定位到人脸的关键信息。
In [1]
# 环境导入
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2
import paddle
paddle.set_device('gpu') # 设置为GPU
import warnings
warnings.filterwarnings('ignore') # 忽略 warning
本次实验所采用的数据集来源为github的开源项目
目前该数据集已上传到 AI Studio 人脸关键点识别,加载后可以直接使用下面的命令解压。
In [2]
# !unzip data/data69065/data.zip
解压后的数据集结构为
data/
|—— test
| |—— Abdel_Aziz_Al-Hakim_00.jpg
... ...
|—— test_frames_keypoints.csv
|—— training
| |—— Abdullah_Gul_10.jpg
... ...
|—— training_frames_keypoints.csv
其中,training
和 test
文件夹分别存放训练集和测试集。training_frames_keypoints.csv
和 test_frames_keypoints.csv
存放着训练集和测试集的标签。接下来,我们先来观察一下 training_frames_keypoints.csv
文件,看一下训练集的标签是如何定义的。
In [3]
key_pts_frame = pd.read_csv('data/training_frames_keypoints.csv') # 读取数据集
print('Number of images: ', key_pts_frame.shape[0]) # 输出数据集大小
key_pts_frame.head(5) # 看前五条数据
Number of images: 3462
Unnamed: 0 0 1 2 3 4 5 6 \ 0 Luis_Fonsi_21.jpg 45.0 98.0 47.0 106.0 49.0 110.0 53.0 1 Lincoln_Chafee_52.jpg 41.0 83.0 43.0 91.0 45.0 100.0 47.0 2 Valerie_Harper_30.jpg 56.0 69.0 56.0 77.0 56.0 86.0 56.0 3 Angelo_Reyes_22.jpg 61.0 80.0 58.0 95.0 58.0 108.0 58.0 4 Kristen_Breitweiser_11.jpg 58.0 94.0 58.0 104.0 60.0 113.0 62.0 7 8 ... 126 127 128 129 130 131 132 133 \ 0 119.0 56.0 ... 83.0 119.0 90.0 117.0 83.0 119.0 81.0 122.0 1 108.0 51.0 ... 85.0 122.0 94.0 120.0 85.0 122.0 83.0 122.0 2 94.0 58.0 ... 79.0 105.0 86.0 108.0 77.0 105.0 75.0 105.0 3 120.0 58.0 ... 98.0 136.0 107.0 139.0 95.0 139.0 91.0 139.0 4 121.0 67.0 ... 92.0 117.0 103.0 118.0 92.0 120.0 88.0 122.0 134 135 0 77.0 122.0 1 79.0 122.0 2 73.0 105.0 3 85.0 136.0 4 84.0 122.0 [5 rows x 137 columns]
上表中每一行都代表一条数据,其中,第一列是图片的文件名,之后从第0列到第135列,就是该图的关键点信息。因为每个关键点可以用两个坐标表示,所以 136/2 = 68,就可以看出这个数据集为68点人脸关键点数据集。
Tips1: 目前常用的人脸关键点标注,有如下点数的标注
Tips2:本次所采用的68标注,标注顺序如下:
In [68]
# 计算标签的均值和标准差,用于标签的归一化
key_pts_values = key_pts_frame.values[:,1:] # 取出标签信息
data_mean = key_pts_values.mean() # 计算均值
data_std = key_pts_values.std() # 计算标准差
print('标签的均值为:', data_mean)
print('标签的标准差为:', data_std)
标签的均值为: 104.4724870017331 标签的标准差为: 43.17302271754281
In [69]
def show_keypoints(image, key_pts):
"""
Args:
image: 图像信息
key_pts: 关键点信息,
展示图片和关键点信息
"""
plt.imshow(image.astype('uint8')) # 展示图片信息
for i in range(len(key_pts)//2,):
plt.scatter(key_pts[i*2], key_pts[i*2+1], s=20, marker='.', c='b') # 展示关键点信息
In [70]
# 展示单条数据
n = 14 # n为数据在表格中的索引
image_name = key_pts_frame.iloc[n, 0] # 获取图像名称
key_pts = key_pts_frame.iloc[n, 1:].as_matrix() # 将图像label格式转为numpy.array的格式
key_pts = key_pts.astype('float').reshape(-1) # 获取图像关键点信息
print(key_pts.shape)
plt.figure(figsize=(5, 5)) # 展示的图像大小
show_keypoints(mpimg.imread(os.path.join('data/training/', image_name)), key_pts) # 展示图像与关键点信息
plt.show() # 展示图像
(136,)
使用飞桨框架高层API的 paddle.io.Dataset
自定义数据集类,具体可以参考官网文档 自定义数据集。
按照 __init__
中的定义,实现 __getitem__
和 __len__
.
In [71]
# 按照Dataset的使用规范,构建人脸关键点数据集
from paddle.io import Dataset
class FacialKeypointsDataset(Dataset):
# 人脸关键点数据集
"""
步骤一:继承paddle.io.Dataset类
"""
def __init__(self, csv_file, root_dir, transform=None):
"""
步骤二:实现构造函数,定义数据集大小
Args:
csv_file (string): 带标注的csv文件路径
root_dir (string): 图片存储的文件夹路径
transform (callable, optional): 应用于图像上的数据处理方法
"""
self.key_pts_frame = pd.read_csv(csv_file) # 读取csv文件
self.root_dir = root_dir # 获取图片文件夹路径
self.transform = transform # 获取 transform 方法
def __getitem__(self, idx):
"""
步骤三:实现__getitem__方法,定义指定index时如何获取数据,并返回单条数据(训练数据,对应的标签)
"""
# 实现 __getitem__
return image, key_pts
def __len__(self):
"""
步骤四:实现__len__方法,返回数据集总数目
"""
# 实现 __len__
实例化数据集并显示一些图像。
In [72]
# 构建一个数据集类
face_dataset = FacialKeypointsDataset(csv_file='data/training_frames_keypoints.csv',
root_dir='data/training/')
# 输出数据集大小
print('数据集大小为: ', len(face_dataset))
# 根据 face_dataset 可视化数据集
num_to_display = 3
for i in range(num_to_display):
# 定义图片大小
fig = plt.figure(figsize=(20,10))
# 随机选择图片
rand_i = np.random.randint(0, len(face_dataset))
sample = face_dataset[rand_i]
# 输出图片大小和关键点的数量
print(i, sample[0].shape, sample[1].shape)
# 设置图片打印信息
ax = plt.subplot(1, num_to_display, i + 1)
ax.set_title('Sample #{}'.format(i))
# 输出图片
show_keypoints(sample[0], sample[1])
数据集大小为: 3462 0 (256, 216, 3) (136,) 1 (217, 227, 3) (136,) 2 (171, 180, 3) (136,)
上述代码虽然完成了数据集的定义,但是还有一些问题,如:
这些问题都会影响模型最终的性能,所以需要对数据进行预处理。
对图像进行预处理,包括灰度化、归一化、重新设置尺寸、随机裁剪,修改通道格式等等,以满足数据要求;每一类的功能如下:
实现数据预处理方法 ToCHW
In [73]
# 标准化自定义 transform 方法
class TransformAPI(object):
"""
步骤一:继承 object 类
"""
def __call__(self, data):
"""
步骤二:在 __call__ 中定义数据处理方法
"""
processed_data = data
return processed_data
In [74]
import paddle.vision.transforms.functional as F
class GrayNormalize(object):
# 将图片变为灰度图,并将其值放缩到[0, 1]
# 将 label 放缩到 [-1, 1] 之间
def __call__(self, data):
image = data[0] # 获取图片
key_pts = data[1] # 获取标签
image_copy = np.copy(image)
key_pts_copy = np.copy(key_pts)
# 灰度化图片
gray_scale = paddle.vision.transforms.Grayscale(num_output_channels=3)
image_copy = gray_scale(image_copy)
# 将图片值放缩到 [0, 1]
image_copy = image_copy / 255.0
# 将坐标点放缩到 [-1, 1]
mean = data_mean # 获取标签均值
std = data_std # 获取标签标准差
key_pts_copy = (key_pts_copy - mean)/std
return image_copy, key_pts_copy
class Resize(object):
# 将输入图像调整为指定大小
def __init__(self, output_size):
assert isinstance(output_size, (int, tuple))
self.output_size = output_size
def __call__(self, data):
image = data[0] # 获取图片
key_pts = data[1] # 获取标签
image_copy = np.copy(image)
key_pts_copy = np.copy(key_pts)
h, w = image_copy.shape[:2]
if isinstance(self.output_size, int):
if h > w:
new_h, new_w = self.output_size * h / w, self.output_size
else:
new_h, new_w = self.output_size, self.output_size * w / h
else:
new_h, new_w = self.output_size
new_h, new_w = int(new_h), int(new_w)
img = F.resize(image_copy, (new_h, new_w))
# scale the pts, too
key_pts_copy[::2] = key_pts_copy[::2] * new_w / w
key_pts_copy[1::2] = key_pts_copy[1::2] * new_h / h
return img, key_pts_copy
class RandomCrop(object):
# 随机位置裁剪输入的图像
def __init__(self, output_size):
assert isinstance(output_size, (int, tuple))
if isinstance(output_size, int):
self.output_size = (output_size, output_size)
else:
assert len(output_size) == 2
self.output_size = output_size
def __call__(self, data):
image = data[0]
key_pts = data[1]
image_copy = np.copy(image)
key_pts_copy = np.copy(key_pts)
h, w = image_copy.shape[:2]
new_h, new_w = self.output_size
top = np.random.randint(0, h - new_h)
left = np.random.randint(0, w - new_w)
image_copy = image_copy[top: top + new_h,
left: left + new_w]
key_pts_copy[::2] = key_pts_copy[::2] - left
key_pts_copy[1::2] = key_pts_copy[1::2] - top
return image_copy, key_pts_copy
class ToCHW(object):
# 将图像的格式由HWC改为CHW
def __call__(self, data):
# 实现ToCHW,可以使用 paddle.vision.transforms.Transpose 实现
return image, key_pts
看一下每种图像预处理方法的的效果。
In [75]
import paddle.vision.transforms as T
# 测试 Resize
resize = Resize(256)
# 测试 RandomCrop
random_crop = RandomCrop(128)
# 测试 GrayNormalize
norm = GrayNormalize()
# 测试 Resize + RandomCrop,图像大小变到250*250, 然后截取出224*224的图像块
composed = paddle.vision.transforms.Compose([Resize(250), RandomCrop(224)])
test_num = 800 # 测试的数据下标
data = face_dataset[test_num]
transforms = {'None': None,
'norm': norm,
'random_crop': random_crop,
'resize': resize ,
'composed': composed}
for i, func_name in enumerate(['None', 'norm', 'random_crop', 'resize', 'composed']):
# 定义图片大小
fig = plt.figure(figsize=(20,10))
# 处理图片
if transforms[func_name] != None:
transformed_sample = transforms[func_name](data)
else:
transformed_sample = data
# 设置图片打印信息
ax = plt.subplot(1, 5, i + 1)
ax.set_title(' Transform is #{}'.format(func_name))
# 输出图片
show_keypoints(transformed_sample[0], transformed_sample[1])
让我们将 Resize、RandomCrop、GrayNormalize、ToCHW
应用于新的数据集
In [76]
from paddle.vision.transforms import Compose
data_transform = Compose([Resize(256), RandomCrop(224), GrayNormalize(), ToCHW()])
# create the transformed dataset
train_dataset = FacialKeypointsDataset(csv_file='data/training_frames_keypoints.csv',
root_dir='data/training/',
transform=data_transform)
print('Number of train dataset images: ', len(train_dataset))
for i in range(4):
sample = train_dataset[i]
print(i, sample[0].shape, sample[1].shape)
test_dataset = FacialKeypointsDataset(csv_file='data/test_frames_keypoints.csv',
root_dir='data/test/',
transform=data_transform)
print('Number of test dataset images: ', len(test_dataset))
Number of train dataset images: 3462 0 (3, 224, 224) (136,) 1 (3, 224, 224) (136,) 2 (3, 224, 224) (136,) 3 (3, 224, 224) (136,) Number of test dataset images: 770
根据前文的分析可知,人脸关键点检测和分类,可以使用同样的网络结构,如LeNet、Resnet50等完成特征的提取,只是在原来的基础上,需要修改模型的最后部分,将输出调整为 人脸关键点的数量*2,即每个人脸关键点的横坐标与纵坐标,就可以完成人脸关键点检测任务了,具体可以见下面的代码,也可以参考官网案例:人脸关键点检测
网络结构如下:
In [77]
import paddle.nn as nn
from paddle.vision.models import resnet50
class SimpleNet(nn.Layer):
def __init__(self, key_pts):
super(SimpleNet, self).__init__()
# 实现 __init__
def forward(self, x):
# 实现 forward
return x
使用model.summary
可视化网络结构。
In [78]
model = paddle.Model(SimpleNet(key_pts=68))
model.summary((-1, 3, 224, 224))
------------------------------------------------------------------------------- Layer (type) Input Shape Output Shape Param # =============================================================================== Conv2D-213 [[1, 3, 224, 224]] [1, 64, 112, 112] 9,408 BatchNorm2D-213 [[1, 64, 112, 112]] [1, 64, 112, 112] 256 ReLU-73 [[1, 64, 112, 112]] [1, 64, 112, 112] 0 MaxPool2D-5 [[1, 64, 112, 112]] [1, 64, 56, 56] 0 Conv2D-215 [[1, 64, 56, 56]] [1, 64, 56, 56] 4,096 BatchNorm2D-215 [[1, 64, 56, 56]] [1, 64, 56, 56] 256 ReLU-74 [[1, 256, 56, 56]] [1, 256, 56, 56] 0 Conv2D-216 [[1, 64, 56, 56]] [1, 64, 56, 56] 36,864 BatchNorm2D-216 [[1, 64, 56, 56]] [1, 64, 56, 56] 256 Conv2D-217 [[1, 64, 56, 56]] [1, 256, 56, 56] 16,384 BatchNorm2D-217 [[1, 256, 56, 56]] [1, 256, 56, 56] 1,024 Conv2D-214 [[1, 64, 56, 56]] [1, 256, 56, 56] 16,384 BatchNorm2D-214 [[1, 256, 56, 56]] [1, 256, 56, 56] 1,024 BottleneckBlock-65 [[1, 64, 56, 56]] [1, 256, 56, 56] 0 Conv2D-218 [[1, 256, 56, 56]] [1, 64, 56, 56] 16,384 BatchNorm2D-218 [[1, 64, 56, 56]] [1, 64, 56, 56] 256 ReLU-75 [[1, 256, 56, 56]] [1, 256, 56, 56] 0 Conv2D-219 [[1, 64, 56, 56]] [1, 64, 56, 56] 36,864 BatchNorm2D-219 [[1, 64, 56, 56]] [1, 64, 56, 56] 256 Conv2D-220 [[1, 64, 56, 56]] [1, 256, 56, 56] 16,384 BatchNorm2D-220 [[1, 256, 56, 56]] [1, 256, 56, 56] 1,024 BottleneckBlock-66 [[1, 256, 56, 56]] [1, 256, 56, 56] 0 Conv2D-221 [[1, 256, 56, 56]] [1, 64, 56, 56] 16,384 BatchNorm2D-221 [[1, 64, 56, 56]] [1, 64, 56, 56] 256 ReLU-76 [[1, 256, 56, 56]] [1, 256, 56, 56] 0 Conv2D-222 [[1, 64, 56, 56]] [1, 64, 56, 56] 36,864 BatchNorm2D-222 [[1, 64, 56, 56]] [1, 64, 56, 56] 256 Conv2D-223 [[1, 64, 56, 56]] [1, 256, 56, 56] 16,384 BatchNorm2D-223 [[1, 256, 56, 56]] [1, 256, 56, 56] 1,024 BottleneckBlock-67 [[1, 256, 56, 56]] [1, 256, 56, 56] 0 Conv2D-225 [[1, 256, 56, 56]] [1, 128, 56, 56] 32,768 BatchNorm2D-225 [[1, 128, 56, 56]] [1, 128, 56, 56] 512 ReLU-77 [[1, 512, 28, 28]] [1, 512, 28, 28] 0 Conv2D-226 [[1, 128, 56, 56]] [1, 128, 28, 28] 147,456 BatchNorm2D-226 [[1, 128, 28, 28]] [1, 128, 28, 28] 512 Conv2D-227 [[1, 128, 28, 28]] [1, 512, 28, 28] 65,536 BatchNorm2D-227 [[1, 512, 28, 28]] [1, 512, 28, 28] 2,048 Conv2D-224 [[1, 256, 56, 56]] [1, 512, 28, 28] 131,072 BatchNorm2D-224 [[1, 512, 28, 28]] [1, 512, 28, 28] 2,048 BottleneckBlock-68 [[1, 256, 56, 56]] [1, 512, 28, 28] 0 Conv2D-228 [[1, 512, 28, 28]] [1, 128, 28, 28] 65,536 BatchNorm2D-228 [[1, 128, 28, 28]] [1, 128, 28, 28] 512 ReLU-78 [[1, 512, 28, 28]] [1, 512, 28, 28] 0 Conv2D-229 [[1, 128, 28, 28]] [1, 128, 28, 28] 147,456 BatchNorm2D-229 [[1, 128, 28, 28]] [1, 128, 28, 28] 512 Conv2D-230 [[1, 128, 28, 28]] [1, 512, 28, 28] 65,536 BatchNorm2D-230 [[1, 512, 28, 28]] [1, 512, 28, 28] 2,048 BottleneckBlock-69 [[1, 512, 28, 28]] [1, 512, 28, 28] 0 Conv2D-231 [[1, 512, 28, 28]] [1, 128, 28, 28] 65,536 BatchNorm2D-231 [[1, 128, 28, 28]] [1, 128, 28, 28] 512 ReLU-79 [[1, 512, 28, 28]] [1, 512, 28, 28] 0 Conv2D-232 [[1, 128, 28, 28]] [1, 128, 28, 28] 147,456 BatchNorm2D-232 [[1, 128, 28, 28]] [1, 128, 28, 28] 512 Conv2D-233 [[1, 128, 28, 28]] [1, 512, 28, 28] 65,536 BatchNorm2D-233 [[1, 512, 28, 28]] [1, 512, 28, 28] 2,048 BottleneckBlock-70 [[1, 512, 28, 28]] [1, 512, 28, 28] 0 Conv2D-234 [[1, 512, 28, 28]] [1, 128, 28, 28] 65,536 BatchNorm2D-234 [[1, 128, 28, 28]] [1, 128, 28, 28] 512 ReLU-80 [[1, 512, 28, 28]] [1, 512, 28, 28] 0 Conv2D-235 [[1, 128, 28, 28]] [1, 128, 28, 28] 147,456 BatchNorm2D-235 [[1, 128, 28, 28]] [1, 128, 28, 28] 512 Conv2D-236 [[1, 128, 28, 28]] [1, 512, 28, 28] 65,536 BatchNorm2D-236 [[1, 512, 28, 28]] [1, 512, 28, 28] 2,048 BottleneckBlock-71 [[1, 512, 28, 28]] [1, 512, 28, 28] 0 Conv2D-238 [[1, 512, 28, 28]] [1, 256, 28, 28] 131,072 BatchNorm2D-238 [[1, 256, 28, 28]] [1, 256, 28, 28] 1,024 ReLU-81 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 0 Conv2D-239 [[1, 256, 28, 28]] [1, 256, 14, 14] 589,824 BatchNorm2D-239 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024 Conv2D-240 [[1, 256, 14, 14]] [1, 1024, 14, 14] 262,144 BatchNorm2D-240 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 4,096 Conv2D-237 [[1, 512, 28, 28]] [1, 1024, 14, 14] 524,288 BatchNorm2D-237 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 4,096 BottleneckBlock-72 [[1, 512, 28, 28]] [1, 1024, 14, 14] 0 Conv2D-241 [[1, 1024, 14, 14]] [1, 256, 14, 14] 262,144 BatchNorm2D-241 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024 ReLU-82 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 0 Conv2D-242 [[1, 256, 14, 14]] [1, 256, 14, 14] 589,824 BatchNorm2D-242 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024 Conv2D-243 [[1, 256, 14, 14]] [1, 1024, 14, 14] 262,144 BatchNorm2D-243 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 4,096 BottleneckBlock-73 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 0 Conv2D-244 [[1, 1024, 14, 14]] [1, 256, 14, 14] 262,144 BatchNorm2D-244 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024 ReLU-83 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 0 Conv2D-245 [[1, 256, 14, 14]] [1, 256, 14, 14] 589,824 BatchNorm2D-245 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024 Conv2D-246 [[1, 256, 14, 14]] [1, 1024, 14, 14] 262,144 BatchNorm2D-246 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 4,096 BottleneckBlock-74 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 0 Conv2D-247 [[1, 1024, 14, 14]] [1, 256, 14, 14] 262,144 BatchNorm2D-247 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024 ReLU-84 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 0 Conv2D-248 [[1, 256, 14, 14]] [1, 256, 14, 14] 589,824 BatchNorm2D-248 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024 Conv2D-249 [[1, 256, 14, 14]] [1, 1024, 14, 14] 262,144 BatchNorm2D-249 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 4,096 BottleneckBlock-75 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 0 Conv2D-250 [[1, 1024, 14, 14]] [1, 256, 14, 14] 262,144 BatchNorm2D-250 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024 ReLU-85 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 0 Conv2D-251 [[1, 256, 14, 14]] [1, 256, 14, 14] 589,824 BatchNorm2D-251 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024 Conv2D-252 [[1, 256, 14, 14]] [1, 1024, 14, 14] 262,144 BatchNorm2D-252 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 4,096 BottleneckBlock-76 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 0 Conv2D-253 [[1, 1024, 14, 14]] [1, 256, 14, 14] 262,144 BatchNorm2D-253 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024 ReLU-86 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 0 Conv2D-254 [[1, 256, 14, 14]] [1, 256, 14, 14] 589,824 BatchNorm2D-254 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024 Conv2D-255 [[1, 256, 14, 14]] [1, 1024, 14, 14] 262,144 BatchNorm2D-255 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 4,096 BottleneckBlock-77 [[1, 1024, 14, 14]] [1, 1024, 14, 14] 0 Conv2D-257 [[1, 1024, 14, 14]] [1, 512, 14, 14] 524,288 BatchNorm2D-257 [[1, 512, 14, 14]] [1, 512, 14, 14] 2,048 ReLU-87 [[1, 2048, 7, 7]] [1, 2048, 7, 7] 0 Conv2D-258 [[1, 512, 14, 14]] [1, 512, 7, 7] 2,359,296 BatchNorm2D-258 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048 Conv2D-259 [[1, 512, 7, 7]] [1, 2048, 7, 7] 1,048,576 BatchNorm2D-259 [[1, 2048, 7, 7]] [1, 2048, 7, 7] 8,192 Conv2D-256 [[1, 1024, 14, 14]] [1, 2048, 7, 7] 2,097,152 BatchNorm2D-256 [[1, 2048, 7, 7]] [1, 2048, 7, 7] 8,192 BottleneckBlock-78 [[1, 1024, 14, 14]] [1, 2048, 7, 7] 0 Conv2D-260 [[1, 2048, 7, 7]] [1, 512, 7, 7] 1,048,576 BatchNorm2D-260 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048 ReLU-88 [[1, 2048, 7, 7]] [1, 2048, 7, 7] 0 Conv2D-261 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,359,296 BatchNorm2D-261 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048 Conv2D-262 [[1, 512, 7, 7]] [1, 2048, 7, 7] 1,048,576 BatchNorm2D-262 [[1, 2048, 7, 7]] [1, 2048, 7, 7] 8,192 BottleneckBlock-79 [[1, 2048, 7, 7]] [1, 2048, 7, 7] 0 Conv2D-263 [[1, 2048, 7, 7]] [1, 512, 7, 7] 1,048,576 BatchNorm2D-263 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048 ReLU-89 [[1, 2048, 7, 7]] [1, 2048, 7, 7] 0 Conv2D-264 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,359,296 BatchNorm2D-264 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048 Conv2D-265 [[1, 512, 7, 7]] [1, 2048, 7, 7] 1,048,576 BatchNorm2D-265 [[1, 2048, 7, 7]] [1, 2048, 7, 7] 8,192 BottleneckBlock-80 [[1, 2048, 7, 7]] [1, 2048, 7, 7] 0 AdaptiveAvgPool2D-5 [[1, 2048, 7, 7]] [1, 2048, 1, 1] 0 Linear-13 [[1, 2048]] [1, 1000] 2,049,000 ResNet-5 [[1, 3, 224, 224]] [1, 1000] 0 Linear-14 [[1, 1000]] [1, 512] 512,512 ReLU-90 [[1, 512]] [1, 512] 0 Linear-15 [[1, 512]] [1, 136] 69,768 =============================================================================== Total params: 26,192,432 Trainable params: 26,086,192 Non-trainable params: 106,240 ------------------------------------------------------------------------------- Input size (MB): 0.57 Forward/backward pass size (MB): 261.50 Params size (MB): 99.92 Estimated Total Size (MB): 361.99 -------------------------------------------------------------------------------
{'total_params': 26192432, 'trainable_params': 26086192}
训练模型前,需要设置训练模型所需的优化器,损失函数和评估指标。
特定任务的 Metric 计算方式在框架既有的 Metric接口中不存在,或算法不符合自己的需求,那么需要我们自己来进行Metric的自定义。这里介绍如何进行Metric的自定义操作,更多信息可以参考官网文档自定义Metric;首先来看下面的代码。
In [81]
from paddle.metric import Metric
class NME(Metric):
"""
1. 继承paddle.metric.Metric
"""
def __init__(self, name='nme', *args, **kwargs):
"""
2. 构造函数实现,自定义参数即可
"""
super(NME, self).__init__(*args, **kwargs)
self._name = name
self.rmse = 0
self.sample_num = 0
def name(self):
"""
3. 实现name方法,返回定义的评估指标名字
"""
return self._name
def update(self, preds, labels):
"""
4. 实现update方法,用于单个batch训练时进行评估指标计算。
- 当`compute`类函数未实现时,会将模型的计算输出和标签数据的展平作为`update`的参数传入。
"""
N = preds.shape[0]
preds = preds.reshape((N, -1, 2))
labels = labels.reshape((N, -1, 2))
self.rmse = 0
for i in range(N):
pts_pred, pts_gt = preds[i, ], labels[i, ]
interocular = np.linalg.norm(pts_gt[36, ] - pts_gt[45, ])
self.rmse += np.sum(np.linalg.norm(pts_pred - pts_gt, axis=1)) / (interocular * preds.shape[1])
self.sample_num += 1
return self.rmse / N
def accumulate(self):
"""
5. 实现accumulate方法,返回历史batch训练积累后计算得到的评价指标值。
每次`update`调用时进行数据积累,`accumulate`计算时对积累的所有数据进行计算并返回。
结算结果会在`fit`接口的训练日志中呈现。
"""
return self.rmse / self.sample_num
def reset(self):
"""
6. 实现reset方法,每个Epoch结束后进行评估指标的重置,这样下个Epoch可以重新进行计算。
"""
self.rmse = 0
self.sample_num = 0
In [82]
# 使用 paddle.Model 封装模型
# 定义Adam优化器
# 定义SmoothL1Loss
# 使用自定义metrics
# 配置模型
# 模型训练
损失函数的选择:L1Loss、L2Loss、SmoothL1Loss的对比
In [83]
The loss value printed in the log is the current step, and the metric is the average value of previous step. Epoch 1/50 step 55/55 [==============================] - loss: 0.1023 - nme: 6.9530e-04 - 519ms/step Epoch 2/50 step 55/55 [==============================] - loss: 0.0777 - nme: 6.4494e-04 - 523ms/step Epoch 3/50 step 55/55 [==============================] - loss: 0.0566 - nme: 5.6187e-04 - 526ms/step Epoch 4/50 step 55/55 [==============================] - loss: 0.0311 - nme: 3.2200e-04 - 522ms/step Epoch 5/50 step 55/55 [==============================] - loss: 0.0464 - nme: 4.2836e-04 - 522ms/step Epoch 6/50 step 55/55 [==============================] - loss: 0.0280 - nme: 3.6241e-04 - 520ms/step Epoch 7/50 step 55/55 [==============================] - loss: 0.0722 - nme: 5.5169e-04 - 524ms/step Epoch 8/50 step 55/55 [==============================] - loss: 0.0458 - nme: 4.0415e-04 - 522ms/step Epoch 9/50 step 55/55 [==============================] - loss: 0.0540 - nme: 4.1228e-04 - 524ms/step Epoch 10/50 step 55/55 [==============================] - loss: 0.0201 - nme: 3.1852e-04 - 520ms/step Epoch 11/50 step 55/55 [==============================] - loss: 0.0429 - nme: 4.4269e-04 - 525ms/step Epoch 12/50 step 55/55 [==============================] - loss: 0.1675 - nme: 0.0010 - 520ms/step Epoch 13/50 step 55/55 [==============================] - loss: 0.0201 - nme: 2.9785e-04 - 524ms/step Epoch 14/50 step 55/55 [==============================] - loss: 0.0345 - nme: 4.2686e-04 - 517ms/step Epoch 15/50 step 55/55 [==============================] - loss: 0.0222 - nme: 3.7174e-04 - 523ms/step Epoch 16/50 step 55/55 [==============================] - loss: 0.0287 - nme: 3.5896e-04 - 523ms/step Epoch 17/50 step 55/55 [==============================] - loss: 0.0185 - nme: 2.6884e-04 - 523ms/step Epoch 18/50 step 55/55 [==============================] - loss: 0.0267 - nme: 3.1695e-04 - 524ms/step Epoch 19/50 step 55/55 [==============================] - loss: 0.0348 - nme: 4.3793e-04 - 520ms/step Epoch 20/50 step 55/55 [==============================] - loss: 0.0154 - nme: 2.3491e-04 - 521ms/step Epoch 21/50 step 55/55 [==============================] - loss: 0.0237 - nme: 3.1029e-04 - 520ms/step Epoch 22/50 step 55/55 [==============================] - loss: 0.0392 - nme: 4.5751e-04 - 523ms/step Epoch 23/50 step 55/55 [==============================] - loss: 0.0617 - nme: 5.2090e-04 - 519ms/step Epoch 24/50 step 55/55 [==============================] - loss: 0.0226 - nme: 3.2028e-04 - 521ms/step Epoch 25/50 step 55/55 [==============================] - loss: 0.0191 - nme: 2.7340e-04 - 521ms/step Epoch 26/50 step 55/55 [==============================] - loss: 0.0183 - nme: 3.1125e-04 - 522ms/step Epoch 27/50 step 55/55 [==============================] - loss: 0.0411 - nme: 4.2144e-04 - 516ms/step Epoch 28/50 step 55/55 [==============================] - loss: 0.0237 - nme: 3.3638e-04 - 519ms/step Epoch 29/50 step 55/55 [==============================] - loss: 0.0232 - nme: 3.0880e-04 - 519ms/step Epoch 30/50 step 55/55 [==============================] - loss: 0.0220 - nme: 3.4524e-04 - 519ms/step Epoch 31/50 step 55/55 [==============================] - loss: 0.0168 - nme: 2.6407e-04 - 521ms/step Epoch 32/50 step 55/55 [==============================] - loss: 0.0319 - nme: 3.6468e-04 - 521ms/step Epoch 33/50 step 55/55 [==============================] - loss: 0.0630 - nme: 6.7493e-04 - 519ms/step Epoch 34/50 step 55/55 [==============================] - loss: 0.0251 - nme: 3.6194e-04 - 523ms/step Epoch 35/50 step 55/55 [==============================] - loss: 0.0154 - nme: 2.4762e-04 - 521ms/step Epoch 36/50 step 55/55 [==============================] - loss: 0.0304 - nme: 4.5266e-04 - 519ms/step Epoch 37/50 step 55/55 [==============================] - loss: 0.0210 - nme: 3.1943e-04 - 517ms/step Epoch 38/50 step 55/55 [==============================] - loss: 0.0243 - nme: 3.4954e-04 - 518ms/step Epoch 39/50 step 55/55 [==============================] - loss: 0.0196 - nme: 2.7806e-04 - 523ms/step Epoch 40/50 step 55/55 [==============================] - loss: 0.0237 - nme: 3.9675e-04 - 518ms/step Epoch 41/50 step 55/55 [==============================] - loss: 0.0296 - nme: 3.3701e-04 - 517ms/step Epoch 42/50 step 55/55 [==============================] - loss: 0.0229 - nme: 3.2235e-04 - 520ms/step Epoch 43/50 step 55/55 [==============================] - loss: 0.0247 - nme: 3.5046e-04 - 520ms/step Epoch 44/50 step 55/55 [==============================] - loss: 0.0434 - nme: 5.1026e-04 - 521ms/step Epoch 45/50 step 55/55 [==============================] - loss: 0.0799 - nme: 6.4841e-04 - 519ms/step Epoch 46/50 step 55/55 [==============================] - loss: 0.0276 - nme: 3.2612e-04 - 518ms/step Epoch 47/50 step 55/55 [==============================] - loss: 0.0415 - nme: 4.3205e-04 - 521ms/step Epoch 48/50 step 55/55 [==============================] - loss: 0.0193 - nme: 2.5496e-04 - 519ms/step Epoch 49/50 step 55/55 [==============================] - loss: 0.0554 - nme: 4.8938e-04 - 524ms/step Epoch 50/50 step 55/55 [==============================] - loss: 0.0262 - nme: 3.5656e-04 - 518ms/step
In [86]
checkpoints_path = './checkpoints/models'
model.save(checkpoints_path)
In [92]
# 定义功能函数
def show_all_keypoints(image, predicted_key_pts):
"""
展示图像,预测关键点
Args:
image:裁剪后的图像 [224, 224, 3]
predicted_key_pts: 预测关键点的坐标
"""
# 展示图像
plt.imshow(image.astype('uint8'))
# 展示关键点
for i in range(0, len(predicted_key_pts), 2):
plt.scatter(predicted_key_pts[i], predicted_key_pts[i+1], s=20, marker='.', c='m')
def visualize_output(test_images, test_outputs, batch_size=1, h=20, w=10):
"""
展示图像,预测关键点
Args:
test_images:裁剪后的图像 [224, 224, 3]
test_outputs: 模型的输出
batch_size: 批大小
h: 展示的图像高
w: 展示的图像宽
"""
if len(test_images.shape) == 3:
test_images = np.array([test_images])
for i in range(batch_size):
plt.figure(figsize=(h, w))
ax = plt.subplot(1, batch_size, i+1)
# 随机裁剪后的图像
image = test_images[i]
# 模型的输出,未还原的预测关键点坐标值
predicted_key_pts = test_outputs[i]
# 还原后的真实的关键点坐标值
predicted_key_pts = predicted_key_pts * data_std + data_mean
# 展示图像和关键点
show_all_keypoints(np.squeeze(image), predicted_key_pts)
plt.axis('off')
plt.show()
In [93]
# 读取图像
img = mpimg.imread('xiaojiejie.jpg')
# 关键点占位符
kpt = np.ones((136, 1))
transform = Compose([Resize(256), RandomCrop(224)])
# 对图像先重新定义大小,并裁剪到 224*224的大小
rgb_img, kpt = transform([img, kpt])
norm = GrayNormalize()
to_chw = ToCHW()
# 对图像进行归一化和格式变换
img, kpt = norm([rgb_img, kpt])
img, kpt = to_chw([img, kpt])
img = np.array([img], dtype='float32')
# 加载保存好的模型进行预测
model = paddle.Model(SimpleNet(key_pts=68))
model.load(checkpoints_path)
model.prepare()
# 预测结果
out = model.predict_batch([img])
out = out[0].reshape((out[0].shape[0], 136, -1))
# 可视化
visualize_output(rgb_img, out, batch_size=1)
当我们得到关键点的信息后,就可以进行一些趣味的应用。
In [94]
# 定义功能函数
def show_fu(image, predicted_key_pts):
"""
展示加了贴纸的图像
Args:
image:裁剪后的图像 [224, 224, 3]
predicted_key_pts: 预测关键点的坐标
"""
# 计算坐标,15 和 34点的中间值
x = (int(predicted_key_pts[28]) + int(predicted_key_pts[66]))//2
y = (int(predicted_key_pts[29]) + int(predicted_key_pts[67]))//2
# 打开 春节小图
star_image = mpimg.imread('light.jpg')
# 处理通道
if(star_image.shape[2] == 4):
star_image = star_image[:,:,1:4]
# 将春节小图放到原图上
image[y:y+len(star_image[0]), x:x+len(star_image[1]),:] = star_image
# 展示处理后的图片
plt.imshow(image.astype('uint8'))
# 展示关键点信息
for i in range(len(predicted_key_pts)//2,):
plt.scatter(predicted_key_pts[i*2], predicted_key_pts[i*2+1], s=20, marker='.', c='m') # 展示关键点信息
def custom_output(test_images, test_outputs, batch_size=1, h=20, w=10):
"""
展示图像,预测关键点
Args:
test_images:裁剪后的图像 [224, 224, 3]
test_outputs: 模型的输出
batch_size: 批大小
h: 展示的图像高
w: 展示的图像宽
"""
if len(test_images.shape) == 3:
test_images = np.array([test_images])
for i in range(batch_size):
plt.figure(figsize=(h, w))
ax = plt.subplot(1, batch_size, i+1)
# 随机裁剪后的图像
image = test_images[i]
# 模型的输出,未还原的预测关键点坐标值
predicted_key_pts = test_outputs[i]
# 还原后的真实的关键点坐标值
predicted_key_pts = predicted_key_pts * data_std + data_mean
# 展示图像和关键点
show_fu(np.squeeze(image), predicted_key_pts)
plt.axis('off')
plt.show()
# 读取图像
img = mpimg.imread('xiaojiejie.jpg')
# 关键点占位符
kpt = np.ones((136, 1))
transform = Compose([Resize(256), RandomCrop(224)])
# 对图像先重新定义大小,并裁剪到 224*224的大小
rgb_img, kpt = transform([img, kpt])
norm = GrayNormalize()
to_chw = ToCHW()
# 对图像进行归一化和格式变换
img, kpt = norm([rgb_img, kpt])
img, kpt = to_chw([img, kpt])
img = np.array([img], dtype='float32')
# 加载保存好的模型进行预测
# model = paddle.Model(SimpleNet())
# model.load(checkpoints_path)
# model.prepare()
# 预测结果
out = model.predict_batch([img])
out = out[0].reshape((out[0].shape[0], 136, -1))
# 可视化
custom_output(rgb_img, out, batch_size=1)