上一篇博客是生成P_Net人脸框的训练数据,这一篇博客是生成P_Net人脸关键点的训练数据。
进入prepare_data文件夹打开gen_landmark_aug_12
,代码如下:
# coding: utf-8
import os
import random
from os.path import join, exists
import cv2
import numpy as np
import numpy.random as npr
from BBox_utils import getDataFromTxt, BBox
from Landmark_utils import rotate, flip
from utils import IoU
#首先导入各种包和库
def GenerateData(ftxt,data_path,net,argument=False):
'''
:param ftxt: name/path of the text file that contains image path,
bounding box, and landmarks
:param output: path of the output dir
:param net: one of the net in the cascaded networks
:param argument: apply augmentation or not
:return: images and related landmarks
'''
# 定义函数GenerateData(),其中
#:参数ftxt表示包含了图片路径的文档的路径
#:参数data_path表示输出目录的路径
#:参数net表示三个级联网络中的一个
#:参数arguement表示
if net == "PNet":
size = 12
elif net == "RNet":
size = 24
elif net == "ONet":
size = 48
else:
print('Net type error')
return
#判断输入进来的是三个网络中的哪一个,得出size的取值
image_id = 0 #初始化image_id的取值
f = open(join(OUTPUT,"landmark_%s_aug.txt" %(size)),'w') #打开路径对应的文档
#dstdir = "train_landmark_few"
# get image path , bounding box, and landmarks from file 'ftxt'
data = getDataFromTxt(ftxt,data_path=data_path) #函数的调用
idx = 0
#image_path bbox landmark(5*2)
for (imgPath, bbox, landmarkGt) in data:
#print imgPath
F_imgs = [] #列表的事先定义
F_landmarks = [] #列表的事先定义
#print(imgPath)
img = cv2.imread(imgPath) #图片的读取
assert(img is not None) #判断图片是否存在
img_h,img_w,img_c = img.shape #获取图片高、宽、通道数
gt_box = np.array([bbox.left,bbox.top,bbox.right,bbox.bottom]) #(x1,y1,x2,y2)
#get sub-image from bbox
f_face = img[bbox.top:bbox.bottom+1,bbox.left:bbox.right+1] #获得人脸框
# resize the gt image to specified size
f_face = cv2.resize(f_face,(size,size)) #resize成12*12
#initialize the landmark
landmark = np.zeros((5, 2)) #初始化人脸关键点
#normalize land mark by dividing the width and height of the ground truth bounding box
# landmakrGt is a list of tuples
for index, one in enumerate(landmarkGt):
# (( x - bbox.left)/ width of bounding box, (y - bbox.top)/ height of bounding box
rv = ((one[0]-gt_box[0])/(gt_box[2]-gt_box[0]), (one[1]-gt_box[1])/(gt_box[3]-gt_box[1]))
# put the normalized value into the new list landmark
landmark[index] = rv
'''操作的内容看懂了,但是为什么这么做还是不懂,先记在这里'''
F_imgs.append(f_face)
F_landmarks.append(landmark.reshape(10))
landmark = np.zeros((5, 2))
if argument:
idx = idx + 1
if idx % 100 == 0:
print(idx, "images done")
x1, y1, x2, y2 = gt_box
#gt's width
gt_w = x2 - x1 + 1
#gt's height
gt_h = y2 - y1 + 1
if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0:
continue
#random shift
#这部分内容上一篇博客里有,就不重复了,是在人脸框附近做一个随机的裁剪框
for i in range(10):
bbox_size = npr.randint(int(min(gt_w, gt_h) * 0.8), np.ceil(1.25 * max(gt_w, gt_h)))
delta_x = npr.randint(-gt_w * 0.2, gt_w * 0.2)
delta_y = npr.randint(-gt_h * 0.2, gt_h * 0.2)
nx1 = int(max(x1+gt_w/2-bbox_size/2+delta_x,0))
ny1 = int(max(y1+gt_h/2-bbox_size/2+delta_y,0))
nx2 = nx1 + bbox_size
ny2 = ny1 + bbox_size
if nx2 > img_w or ny2 > img_h: #抛弃错误的裁剪边框
continue
crop_box = np.array([nx1,ny1,nx2,ny2])
cropped_im = img[ny1:ny2+1,nx1:nx2+1,:] #裁剪边框
resized_im = cv2.resize(cropped_im, (size, size)) #resize成12*12
#cal iou
iou = IoU(crop_box, np.expand_dims(gt_box,0)) #计算IoU值
if iou > 0.65:
F_imgs.append(resized_im)
#normalize
#和之前一样的操作
for index, one in enumerate(landmarkGt):
rv = ((one[0]-nx1)/bbox_size, (one[1]-ny1)/bbox_size)
landmark[index] = rv
F_landmarks.append(landmark.reshape(10))
landmark = np.zeros((5, 2))
landmark_ = F_landmarks[-1].reshape(-1,2)
bbox = BBox([nx1,ny1,nx2,ny2])
#mirror
#随机镜像
if random.choice([0,1]) > 0:
face_flipped, landmark_flipped = flip(resized_im, landmark_) #水平复制后的人脸框和人脸坐标
face_flipped = cv2.resize(face_flipped, (size, size)) #resize操作
#c*h*w
F_imgs.append(face_flipped) #人脸的保存
F_landmarks.append(landmark_flipped.reshape(10))#关键点的保存
#rotate
#随机旋转
if random.choice([0,1]) > 0:
face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
bbox.reprojectLandmark(landmark_), 5)#获得旋转后的face和landmark
#landmark_offset
#对于landmark的偏移
landmark_rotated = bbox.projectLandmark(landmark_rotated)
face_rotated_by_alpha = cv2.resize(face_rotated_by_alpha, (size, size))
F_imgs.append(face_rotated_by_alpha)
F_landmarks.append(landmark_rotated.reshape(10))
#flip
#在进行一次水平翻转操作,和上面一样
face_flipped, landmark_flipped = flip(face_rotated_by_alpha, landmark_rotated)
face_flipped = cv2.resize(face_flipped, (size, size))
F_imgs.append(face_flipped)
F_landmarks.append(landmark_flipped.reshape(10))
#anti-clockwise rotation
#顺时针旋转5度,步骤内容和上面一样
if random.choice([0,1]) > 0:
face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
bbox.reprojectLandmark(landmark_), -5)#顺时针旋转
landmark_rotated = bbox.projectLandmark(landmark_rotated)
face_rotated_by_alpha = cv2.resize(face_rotated_by_alpha, (size, size))
F_imgs.append(face_rotated_by_alpha)
F_landmarks.append(landmark_rotated.reshape(10))
#水平翻转操作
face_flipped, landmark_flipped = flip(face_rotated_by_alpha, landmark_rotated)
face_flipped = cv2.resize(face_flipped, (size, size))
F_imgs.append(face_flipped)
F_landmarks.append(landmark_flipped.reshape(10))
#图片和landmark的asarray化
F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks)
#print F_imgs.shape
#print F_landmarks.shape
for i in range(len(F_imgs)):
#if image_id % 100 == 0:
#print('image id : ', image_id)
#np.where(a,b,c):a为真时,返回b,不然返回c
if np.sum(np.where(F_landmarks[i] <= 0, 1, 0)) > 0:
continue
if np.sum(np.where(F_landmarks[i] >= 1, 1, 0)) > 0:
continue
#当landmark的值在(0,1)之外时,舍弃错误的数据,将正确的landmark和图片保存
cv2.imwrite(join(dstdir,"%d.jpg" %(image_id)), F_imgs[i])
landmarks = map(str,list(F_landmarks[i]))
f.write(join(dstdir,"%d.jpg" %(image_id))+" -2 "+" ".join(landmarks)+"\n")
image_id = image_id + 1
#print F_imgs.shape
#print F_landmarks.shape
#F_imgs = processImage(F_imgs)
#shuffle_in_unison_scary(F_imgs, F_landmarks)
f.close()
return F_imgs,F_landmarks
if __name__ == '__main__':
dstdir = "../../DATA/12/train_PNet_landmark_aug"
OUTPUT = '../../DATA/12'
data_path = '../../DATA'
if not exists(OUTPUT):
os.mkdir(OUTPUT)
if not exists(dstdir):
os.mkdir(dstdir)
assert (exists(dstdir) and exists(OUTPUT)) #目录的创建
# train data
net = "PNet"
#the file contains the names of all the landmark training data
train_txt = "trainImageList.txt"
imgs,landmarks = GenerateData(train_txt,data_path,net,argument=True )
这里用到了flip函数和getDataFromTxt函数和rotate函数。输出结果是一个包含图片的文件夹train_PNet_landmark_aug
和一个包含了图片信息的注释文档landmark_12_aug.txt
。注释文档里面每行有12个参数,第1个是表示路径,第二个是表示这是个landmark数据的label值-2,最后10个数据是人脸关键点的坐标。
下一篇博客将两份数据集合并在一起。