INRIA数据集链接:https://pan.baidu.com/s/1Z2TVvGuuvor7juqj3uPG-g
提取码:07ed
我制作的PascalVOC格式的INRIA数据集链接:https://pan.baidu.com/s/1licLJkXICcwSdAwMb8B0OA
提取码:3rh2
PascalVOC格式数据集由三个部分构成,JPEGImages,Annotations,ImageSets。
JPEGImages:存放的是训练与测试的所有图片。
Annotations:里面存放的是每张图片打完标签所对应的XML文件
ImageSets:里面有个Main文件夹,其中包含存储着图片名称的txt文件,分为训练与测试。
此处参考行人检测数据集汇总
该数据库是目前使用最多的静态行人检测数据库,提供原始图片及相应的标注文件。训练集有正样本614张(包含2416个行人),负样本1218张;测试集有正样本288张(包含1126个行人),负样本453张。图片中人体大部分为站立姿势且高度大于100个象素,部分标注可能不正确。图片主要来源于GRAZ-01、个人照片及google,因此图片的清晰度较高。在XP操作系统下部分训练或者测试图片无法看清楚,但可用OpenCV正常读取和显示。
(因为第一次制作数据集,为了省事,我直接把INRIA数据集中的70X134H96和96X160H96中的图片全部放到JPEGImages文件夹中了)
import os
from PIL import Image
image_path = "JPEGImages/" # 修改为图片保存路径
image_list = os.listdir(image_path) # 以列表保存图片名
for img in image_list:
img = image_path + img # 图片路径
new_img = img[:-3] + "jpg" # 新图片名
im = Image.open(img) # 打开图片
im = im.convert("RGB") # png为四通道RGBA,jpg为三通道RGB
im.save(new_img) # 保存
if img[-3:] == "png":
os.remove(img) # 删除png图片
这个地方有点问题,我直接对所有图片进行了批量处理,导致没有提取图片的特征,如果要进行特征提取,就需要一张张处理图片
(此处参考博客:Python 提取图像信息保存为TXT、xml格式)
import os
import cv2
# 图像处理类 内置各种函数
class image_processing():
def __init__(self):
self.img_path = "./JPEGImages/" # 修改为jpg图片保存的路径
self.annotations_txt_path = "./" # 修改为txt文件保存的路径
self.annotations_xml_path = "./Annotations/" # 修改为xml文件保存的路径
if not os.path.exists(self.annotations_xml_path):
os.makedirs(self.annotations_xml_path)
# 图像批量重命名
def rename(self):
imagelist = os.listdir(self.img_path)
total_num = len(imagelist) # 得到图像数量大小
i = 4500
for item in imagelist:
# print item
if item.endswith('.jpg'):
src = os.path.join(os.path.abspath(self.img_path), item)
dst = os.path.join(os.path.abspath(self.img_path), '00' + format(str(i), '0>4s') + '.jpg')
os.rename(src, dst)
print('converting %s to %s ...' % (src, dst))
i = i + 1
print('total %d to rename & converted %d jpgs' % (total_num, i))
# 提取图像的shape到txt文件里
def get_image_information(self):
image_list = os.listdir(self.img_path)
print(len(image_list))
file_txt = open('./sex_image_txt.txt', "w")
for i in range(len(image_list)):
img = cv2.imread(os.path.join(self.img_path + image_list[i]))
image_shape = img.shape
print(image_shape)
file_txt.write(image_list[i] + ' ' + '3 ' + '5 ' + '5 ' + str(image_shape[0] - 5) + str(image_shape[1] - 5) + '\n')
file_txt.close()
if __name__ == '__main__':
newname = image_processing()
# newname.rename()
newname.get_image_information()
from xml.dom.minidom import Document
import os
from PIL import Image
ann_path = "./sex_image_txt.txt" # 修改为txt文件路径
img_path = "./JPEGImages/" # 修改为jpg图片路径
xml_path = "./Annotations/" # 修改为xml文件路径
database_name = "INRIAPerson Database"
# 标签的类别
label_list = ["person"]
if not os.path.exists(xml_path):
os.mkdir(xml_path)
def writeXml(imgname, imgpath, w, h, label_list, wxml, database_name):
doc = Document()
# owner
annotation = doc.createElement('annotation')
doc.appendChild(annotation)
# owner
folder = doc.createElement('folder')
annotation.appendChild(folder)
folder_txt = doc.createTextNode(database_name)
folder.appendChild(folder_txt)
filename = doc.createElement('filename')
annotation.appendChild(filename)
filename_txt = doc.createTextNode(imgname)
filename.appendChild(filename_txt)
path = doc.createElement('path')
annotation.appendChild(path)
path_txt = doc.createTextNode(imgpath)
path.appendChild(path_txt)
# ones#
source = doc.createElement('source')
annotation.appendChild(source)
database = doc.createElement('database')
source.appendChild(database)
database_txt = doc.createTextNode(database_name)
database.appendChild(database_txt)
# onee#
# twos#
size = doc.createElement('size')
annotation.appendChild(size)
width = doc.createElement('width')
size.appendChild(width)
width_txt = doc.createTextNode(str(w))
width.appendChild(width_txt)
height = doc.createElement('height')
size.appendChild(height)
height_txt = doc.createTextNode(str(h))
height.appendChild(height_txt)
depth = doc.createElement('depth')
size.appendChild(depth)
depth_txt = doc.createTextNode("3")
depth.appendChild(depth_txt)
# twoe#
segmented = doc.createElement('segmented')
annotation.appendChild(segmented)
segmented_txt = doc.createTextNode("0")
segmented.appendChild(segmented_txt)
# object
object = doc.createElement('object')
annotation.appendChild(object)
name = doc.createElement('name')
object.appendChild(name)
name_content = doc.createTextNode(label_list[0])
name.appendChild(name_content)
pose = doc.createElement('pose')
object.appendChild(pose)
pose_content = doc.createTextNode("0")
pose.appendChild(pose_content)
truncated = doc.createElement('truncated')
object.appendChild(truncated)
truncated_content = doc.createTextNode("0")
truncated.appendChild(truncated_content)
difficult = doc.createElement('difficult')
object.appendChild(difficult)
difficult_content = doc.createTextNode("0")
difficult.appendChild(difficult_content)
bndbox = doc.createElement('bndbox')
object.appendChild(bndbox)
xmin = doc.createElement('xmin')
bndbox.appendChild(xmin)
xmin_content = doc.createTextNode(str(5))
xmin.appendChild(xmin_content)
ymin = doc.createElement('ymin')
bndbox.appendChild(ymin)
ymin_content = doc.createTextNode(str(5))
ymin.appendChild(ymin_content)
xmax = doc.createElement('xmax')
bndbox.appendChild(xmax)
xmax_content = doc.createTextNode(str(w - 5))
xmax.appendChild(xmax_content)
ymax = doc.createElement('ymax')
bndbox.appendChild(ymax)
ymax_content = doc.createTextNode(str(h - 5))
ymax.appendChild(ymax_content)
# threee#
with open(wxml, "wb") as f:
f.write(doc.toprettyxml(indent='\t', encoding='utf-8'))
# f.write(doc.toprettyxml())
f.close()
return
f = open(ann_path, 'r')
txt_list = f.readlines()
f.close()
im_name_list = []
for line in txt_list:
line = line.strip()
line_split = line.split(' ')
# print line
img_name = line_split[0]
im_name_list.append(img_name)
fileimgpath = os.path.join(img_path, img_name)
im = Image.open(fileimgpath)
width = int(im.size[0])
height = int(im.size[1])
# print width,height
# print label_list
savename = os.path.join(xml_path, img_name.split('.')[0] + '.xml')
writeXml(img_name, fileimgpath, width, height, label_list, savename, database_name)
这个文件夹里可以添加其他文件,但只用Main文件就足够运行
Main文件夹下要有三个txt文件,分别是train.txt,val.txt,trainval.txt
train.txt - 训练集图片文件名
val.txt - 测试集图片文件名
trainval.txt - 完整数据集图片文件名
此处我把数据集划分的比例为训练集:测试集=2:1
import os
image_path = "JPEGImages/" # 图片路径
filename = "trainval.txt"
image_list = os.listdir(image_path)
with open(filename, "w") as f:
for im in image_list:
s = im[:-4] + " " + "\n"
f.write(s)
import os
image_path = "JPEGImages/" # 图片路径
filename = "train.txt"
image_list = os.listdir(image_path)
i = 0
with open(filename, "w") as f:
for im in image_list:
if i == 2:
i = 0
continue
i += 1
s = im[:-4] + " " + "\n"
f.write(s)
import os
image_path = "JPEGImages/" # 图片路径
filename = "val.txt"
image_list = os.listdir(image_path)
i = 0
with open(filename, "w") as f:
for im in image_list:
i += 1
if i != 3:
continue
if i == 3:
s = im[:-4] + " " + "\n"
f.write(s)
i = 0