import os
import cv2
if __name__ == '__main__':
path = 'D:\songjiahao\DATA\WiderPerson/train.txt'
with open(path, 'r') as f:
img_ids = [x for x in f.read().splitlines()]
for img_id in img_ids: # '000040'
img_path = 'D:/songjiahao/DATA/WiderPerson/Images/' + img_id + '.jpg'
img = cv2.imread(img_path)
im_h = img.shape[0]
im_w = img.shape[1]
print(img_path)
label_path = img_path.replace('Images','Annotations') + '.txt'
print(label_path)
with open(label_path) as file:
line = file.readline()
count = int(line.split('\n')[0]) # 里面行人个数
line = file.readline()
while line:
cls = int(line.split(' ')[0])
print(cls)
# < class_label =1: pedestrians > 行人
# < class_label =2: riders > 骑车的
# < class_label =3: partially-visible persons > 遮挡的部分行人
# < class_label =4: ignore regions > 一些假人,比如图画上的人
# < class_label =5: crowd > 拥挤人群,直接大框覆盖了
if cls == 1 or cls == 3:
xmin = float(line.split(' ')[1])
ymin = float(line.split(' ')[2])
xmax = float(line.split(' ')[3])
ymax = float(line.split(' ')[4].split('\n')[0])
img = cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 2)
line = file.readline()
cv2.imshow('result', img)
cv2.waitKey(0)
数据标注中存在着许多class,1代表行人,2代表骑车的人,3代表遮挡的行人,4代表假人,5代表密集人群。class 5标注直接一个大框标注了,不太实用,去除了四和五类。并将剩下的都归为1类,与crowdhuman数据对齐。
import os
from pathlib import Path
from PIL import Image
import csv
import shutil
import math
# coding=utf-8
def check_charset(file_path):
import chardet
with open(file_path, "rb") as f:
data = f.read(4)
charset = chardet.detect(data)['encoding']
return charset
def convert(size, box0, box1, box2, box3):
dw = 1. / size[0]
dh = 1. / size[1]
x = (box0 + box2) / 2 * dw
y = (box1 + box3) / 2 * dh
w = (box2 - box0) * dw
h = (box3 - box1) * dh
print(x, y, w, h)
x, y ,w ,h = '%.7f'%(x),'%.7f'%(y),'%.7f'%(w),'%.7f'%(h)
return (x, y, w, h)
if __name__ == '__main__':
path = r'D:\songjiahao\DATA\WiderPerson/val.txt'
with open(path, 'r') as f:
img_ids = [x for x in f.read().splitlines()]
for img_id in img_ids: # '000040'
img_path = r'D:\songjiahao\DATA\WiderPerson\widerperson\train/' + img_id + '.jpg'
with Image.open(img_path) as Img:
img_size = Img.size
ans = ''
label_path = img_path.replace('train', 'Annotations') + '.txt'
outpath = r'D:\songjiahao\DATA\WiderPerson\widerperson\labels/' + img_id + '.txt'
print(outpath)
with open(label_path, encoding=check_charset(label_path)) as file:
print(label_path)
line = file.readline()
count = int(line.split('\n')[0]) # 里面行人个数
line = file.readline()
while line:
cls = int(line.split(' ')[0])
if cls == 1 or cls == 2 or cls == 3 :
xmin = float(line.split(' ')[1])
ymin = float(line.split(' ')[2])
xmax = float(line.split(' ')[3])
ymax = float(line.split(' ')[4].split('\n')[0])
# print(img_size[0], img_size[1], xmin, ymin, xmax, ymax)
bb = convert(img_size, xmin, ymin, xmax, ymax)
ans = ans + '1' + ' ' + ' '.join(str(a) for a in bb) + '\n'
line = file.readline()
# print(ans
with open(outpath,'w') as outfile:
outfile.write(ans)
shutil.copy(img_path, r'D:\songjiahao\DATA\WiderPerson\widerperson\annotation/' + img_id + '.jpg')
因为test.的labels并未给出,所以只对train.txt和val.txt把共9000张可用labels进行数据清除和归一化处理。
import shutil
import os
if __name__ == '__main__':
label_path=r"D:\songjiahao\DATA\WiderPerson\widerperson\labels\train/"
imgids = os.listdir(label_path)
print(len(imgids))
n=0
for i in imgids:
n += 1
img_ids_path = label_path.replace('labels', 'images')+ i[0:6] +'.jpg'
To_imgpath=r'D:\songjiahao\DATA\WiderPerson\widerperson\trainimg/'
print(img_ids_path,To_imgpath,n)
shutil.copy(img_ids_path, To_imgpath)
#第一天学习
#第一天学习
# 根据train。txt和test。txt划分数据集
import os
import shutil
#原始路径
traintxt=r'D:\songjiahao\DATA\数据集\WiderPerson\train.txt'
testtxt=r'D:\songjiahao\DATA\数据集\WiderPerson\val.txt'
srcima=r'D:\songjiahao\DATA\数据集\widerhuman\images\train/'
srctxt=r'D:\songjiahao\DATA\数据集\widerhuman\labels\train/'
#训练集路径
train_image_path = r'D:\songjiahao\DATA\数据集\WiderPerson\widerPerson划分\images\train/'
train_label_path = r'D:\songjiahao\DATA\数据集\WiderPerson\widerPerson划分\labels\train/'
# 验证集路径
val_image_path = r'D:\songjiahao\DATA\数据集\WiderPerson\widerPerson划分\images\val/'
val_label_path = r'D:\songjiahao\DATA\数据集\WiderPerson\widerPerson划分\labels\val/'
# 测试集路径
test_image_path = r'D:\songjiahao\DATA\数据集\WiderPerson\widerPerson划分\images\test/'
test_label_path = r'D:\songjiahao\DATA\数据集\WiderPerson\widerPerson划分\labels\test/'
# 数据集划分比例,训练集75%,验证集15%,测试集15%
train_percent = 0.7
val_percent = 0.1
test_percent = 0.1
# 检查文件夹是否存在
def mkdir():
if not os.path.exists(train_image_path):
os.makedirs(train_image_path)
if not os.path.exists(train_label_path):
os.makedirs(train_label_path)
if not os.path.exists(val_image_path):
os.makedirs(val_image_path)
if not os.path.exists(val_label_path):
os.makedirs(val_label_path)
if not os.path.exists(test_image_path):
os.makedirs(test_image_path)
if not os.path.exists(test_label_path):
os.makedirs(test_label_path)
def main():
mkdir()
f = open(traintxt, 'r')
trainlist = f.readlines()
f = open(testtxt, 'r')
testlist = f.readlines()
f.close()
print("训练集数目:{},测试集数目:{}".format(len(trainlist),len(testlist)))
for i in range(len(trainlist)):
name = trainlist[i].strip()
# srcImage = traintxt.replace('train.txt','') + name + '.jpg'
# srcLabel = traintxt.replace('train.txt','') + name + '.txt'
srcImage = srcima + name + '.jpg'
srcLabel = srctxt + name +'.txt'
print(srcImage)
dst_train_Image = train_image_path + name + '.jpg'
dst_train_Label = train_label_path + name + '.txt'
shutil.copyfile(srcImage, dst_train_Image)
print(srcImage)
# shutil.copyfile(srcLabel, dst_train_Label)
for i in range(len(testlist)):
name = testlist[i].strip()
srcImage = srcima + name + '.jpg'
srcLabel = srctxt + name + '.txt'
dst_test_Image = test_image_path + name + '.jpg'
dst_test_Label = test_label_path + name + '.txt'
# shutil.copyfile(srcImage, dst_test_Image)
# shutil.copyfile(srcLabel, dst_test_Label)
# print(i+1)
if __name__ == '__main__':
main()