Pnet数据生成:数据源下载链接( https://download.csdn.net/download/zhouzongzong/11290551 )
已经通过验证。
# coding:utf-8
import os
import cv2
import numpy as np
import numpy.random as npr
def IoU(box, boxes):
"""Compute IoU between detect box and gt boxes
Parameters:
----------
box: numpy array , shape (4, ): x1, y1, x2, y2
predicted boxes
boxes: numpy array, shape (n, 4): x1, x2, y1, y2
input ground truth boxes
Returns:
-------
ovr: numpy.array, shape (n, )
IoU
"""
# 函数的传入参数为box(随机裁剪后的框)和boxes(实际人脸框)
box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1)
# 计算随机裁剪后的框的面积,因为传入的box是以x1, y1, x2, y2这样的数组形式,所以分别对应着左上角的顶点坐标和右下角的顶点坐标,根据这两个坐
# 标点就可以确定出了一个裁剪框,然后横纵坐标的差值的乘积就是随机裁剪框的面积,
area = (boxes[:, 1] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 2] + 1)
# 同上,得出的是实际的人脸框的面积,但是这里要注意一点,因为一张图片的人脸是一个或者多个,所以说实际的boxes是个n行4列的数组,n>=1,n表示实
# 际人脸的个数。故这里用到了boxes[:,2]-boxes[:,0]这样的写法,意思是取出所有维数的第3个元素减去对应的第1个元素,然后加上一,这样就把n个人
# 脸对应的各自的面积存进了area这个数组里面
xx1 = np.maximum(box[0], boxes[:, 0]) # 将随机裁剪框的x1和各个人脸的x1比较,得到较大的xx1
yy1 = np.maximum(box[1], boxes[:, 2]) # 将随机裁剪框的y1和各个人脸的y1比较,得到较大的yy1
xx2 = np.minimum(box[2], boxes[:, 1]) # 将随机裁剪框的x2和各个人脸的x2比较,得到较小的xx2
yy2 = np.minimum(box[3], boxes[:, 3]) # 将随机裁剪框的y2和各个人脸的y2比较,得到较小的yy2
# 这样做的目的是得出两个图片交叉重叠区域的矩形的左上角和右下角坐标
# compute the width and height of the bounding box
h = np.maximum(0, xx2 - xx1 + 1)
w = np.maximum(0, yy2 - yy1 + 1)
inter = w * h # 求得重叠区域的面积
ovr = inter / (box_area + area - inter) # 重叠区域的面积除以真实人脸框的面积与随机裁剪区域面积的和减去重叠区域的面积就是重合率
return ovr # 返回重合率
anno_file = "C:/Desktop/train/trainImageList.txt" # 下载的wider face数据集对应的每张图片的人脸方框数据
im_dir = "C:\\Users\\Desktop\\train" # 将图片解压到这个文件夹
pos_save_dir = "E:/MTCNN/12/positive" # 生成的正样本存放路径
part_save_dir = "E:/MTCNN/12/part" # 生成的无关样本存放路径
neg_save_dir = 'E:/MTCNN/12/negative' # 生成的负样本存放路径
save_dir = "E:/MTCNN/12"
if not os.path.exists(save_dir): # 路径的创建
os.makedirs(save_dir)
if not os.path.exists(pos_save_dir):
os.makedirs(pos_save_dir)
if not os.path.exists(part_save_dir):
os.makedirs(part_save_dir)
if not os.path.exists(neg_save_dir):
os.makedirs(neg_save_dir)
f1 = open(os.path.join(save_dir, 'pos_12.txt'), 'w') # 对应的样本的文档建立
f2 = open(os.path.join(save_dir, 'neg_12.txt'), 'w')
f3 = open(os.path.join(save_dir, 'part_12.txt'), 'w')
with open(anno_file, 'r') as f:
annotations = f.readlines() # 按行读取存放进列表annotations里面
num = len(annotations) # 里面的每一个元素对应着一张照片的人脸数据,所以这个列表的大小就是数据集的照片数量。
print("%d pics in total" % num) # 打印出照片的数量
p_idx = 0 # positive
n_idx = 0 # negative
d_idx = 0 # don't care
idx = 0
box_idx = 0
for annotation in annotations: # for循环读取数据
print(annotation)
annotation = annotation.strip().split(' ') # 去掉每一行数据的首尾空格换行字符,同时以空格为界限,分成一个个的字符
# image path
im_path = annotation[0] # 第0号元素代表的是一个路径
# print(im_path)
# boxed change to float type
bbox = list(map(float, annotation[1:5])) # 第1号元素开始到第4个元素,每四个元素代表着一个人脸框
# gt
print(bbox)
boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4) # 将人脸框的坐标进行reshape操作,变成n行4列的array
# load image
path = os.path.join(im_dir, im_path )
path = path.replace('\\', '/')
print(path)
img = cv2.imread(os.path.join(im_dir, im_path )) # 将路径拼接后读取图片
idx += 1
# if idx % 100 == 0:
# print(idx, "images done")
height, width, channel = img.shape # 读取图片的宽、高、通道数并记录下来
neg_num = 0 # 负样本数初始化为0
# 1---->50
# keep crop random parts, until have 50 negative examples
# get 50 negative sample from every image
while neg_num < 5: # 负样本数小于50的时候
# neg_num's size [40,min(width, height) / 2],min_size:40
# size is a random number between 12 and min(width,height)
size = npr.randint(12, min(width, height) / 2) # size是一个随机数
# top_left coordinate
nx = npr.randint(0, width - size) # 左上方的x坐标是一个随机数
ny = npr.randint(0, height - size) # 左上方的y坐标是一个随机数
# random crop
crop_box = np.array([nx, ny, nx + size, ny + size]) # 随机裁剪的样本
print(crop_box)
# calculate iou
Iou = IoU(crop_box, boxes) # 引入Iou()函数,含有两个参数,随机裁剪的样本crop_box和实际的人脸框boxes,计
# 算出Iou()值
# crop a part from inital image
cropped_im = img[ny: ny + size, nx: nx + size, :] # 将这个部分样本裁剪下来
# resize the cropped image to size 12*12
resized_im = cv2.resize(cropped_im, (12, 12), # resize这个样本成12*12
interpolation=cv2.INTER_LINEAR)
if np.max(Iou) < 0.3: # 当Iou的值小于0.3的时候为负样本
# Iou with all gts must below 0.3
save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
f2.write("E:/MTCNN/12/negative/%s.jpg" % n_idx + ' 0\n') # 样本的路径保存下来
cv2.imwrite(save_file, resized_im) # 图片保存下来
n_idx += 1
neg_num += 1
# for every bounding boxes
for box in boxes:
# box (x_left, x_right,y_top , y_bottom)
x1, x2, y1, y2 = box
# gt's width
w = x2 - x1 + 1
# gt's height
h = y2 - y1 + 1
# 获取每一个样本的宽和高
# in case the ground truth boxes of small faces are not accurate
# 忽略一些小的人脸和那些左顶点超出了图片的人脸框
# 防止那些小人脸的坐标不准确
if max(w, h) < 20 or x1 < 0 or y1 < 0:
continue
# 下面仍然是返回5个负样本,但是返回的样本一定是和真实的人脸框有一定的交集,即(0 width or ny1 + size > height:
continue
crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size]) # 获取裁剪后的矩形框
Iou = IoU(crop_box, boxes) # 计算IoU值
cropped_im = img[ny1: ny1 + size, nx1: nx1 + size, :]
# 图片resize到12*12
resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
# 将符合条件的样本框保存,完成这部操作之后每张图片都生成了55个负样本
if np.max(Iou) < 0.3:
# Iou with all gts must below 0.3
save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
f2.write("E:/MTCNN/12/negative/%s.jpg" % n_idx + ' 0\n')
cv2.imwrite(save_file, resized_im)
n_idx += 1
# 生成正样本和无关样本
for i in range(3):
# pos and part face size [minsize*0.8,maxsize*1.25]
# 设置正样本和部分样本的size
size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h)))
# delta here is the offset of box center
if w < 5:
print(w)
continue
# x1和y1的偏移量
delta_x = npr.randint(-w *0.2, w * 0.2)
delta_y = npr.randint(-h *0.2, h * 0.2)
# deduct size/2 to make sure that the right bottom corner will be out of
# nx1是人脸框的中点的x坐标加减0.2倍宽度再减去一半的size和0之间的最大值
# ny1是人脸框的中点的y坐标加减0.2倍高度再减去一半的size和0之间的最大值
nx1 = int(max(x1 + w / 2 + delta_x - size / 2, 0))
ny1 = int(max(y1 + h / 2 + delta_y - size / 2, 0))
nx2 = nx1 + size # 获得右下角的nx2坐标
ny2 = ny1 + size # 获得右下角的ny2坐标
# 去掉超出图片的的坐标点
if nx2 > width or ny2 > height:
continue
crop_box = np.array([nx1, ny1, nx2, ny2])
# yu gt de offset
# 这是一个bounding box regression操作
offset_x1 = (x1 - nx1) / float(size)
offset_y1 = (y1 - ny1) / float(size)
offset_x2 = (x2 - nx2) / float(size)
offset_y2 = (y2 - ny2) / float(size)
# 裁剪图片
cropped_im = img[ny1: ny2, nx1: nx2, :]
# resize操作
resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
box_ = box.reshape(1, -1) # reshape成行数等于一列数未知的数组
iou = IoU(crop_box, box_) # 计算IoU值
if iou >= 0.65: # 保存为正样本
save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
f1.write("E:/MTCNN/12/positive/%s.jpg" % p_idx + ' 1 %.2f %.2f %.2f %.2f\n' % (
offset_x1, offset_y1, offset_x2, offset_y2))
cv2.imwrite(save_file, resized_im)
p_idx += 1
elif iou >= 0.4: # 保存为部分样本
save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
f3.write("E:/MTCNN/12/part/%s.jpg" % d_idx + ' -1 %.2f %.2f %.2f %.2f\n' % (
offset_x1, offset_y1, offset_x2, offset_y2))
cv2.imwrite(save_file, resized_im)
d_idx += 1
box_idx += 1
if idx % 100 == 0:
print("%s images done, pos: %s part: %s neg: %s" % (idx, p_idx, d_idx, n_idx))
f1.close()
f2.close()
f3.close()