生成的总体目录如下:
在labels中,是图片x.jpg对应的目标标注信息 x.txt
1 0.38875 0.229167 0.391875 0.19 0.400625 0.183333 0.396875 0.175 0.405625 0.156667 0.40375 0.150833 0.398125 0.15 0.403125 0.113333 0.4225 0.105833 0.44625 0.1075 0.45375 0.114167 0.461875 0.14 0.461875 0.161667 0.455625 0.173333 0.43625 0.1925 0.436875 0.166667 0.43375 0.160833 0.425 0.160833 0.38875 0.229167 0.4125 0.224167 0.414375 0.201667 0.425 0.1825 0.424375 0.205 0.4125 0.224167 0.455 0.204167 0.44875 0.204167 0.448125 0.198333 0.4575 0.185833 0.455 0.204167
1 0.19125 0.8525 0.196875 0.791667 0.209375 0.786667 0.199375 0.771667 0.196875 0.753333 0.196875 0.715 0.20375 0.690833 0.21875 0.699167 0.22 0.694167 0.23125 0.690833 0.27375 0.734167 0.30125 0.720833 0.31 0.7225 0.309375 0.755 0.338125 0.781667 0.3375 0.785833 0.29 0.7925 0.271875 0.813333 0.265 0.8325 0.25375 0.839167 0.24375 0.839167 0.22875 0.825833 0.21875 0.8425 0.19125 0.8525
前五个数依次是类别id,box信息,剩余的点为segmentation信息
咱再看下数据划分train2017.txt
这儿是相对路径,和之前的VOC生成有些不同,当时保留的只是文件名(且无后缀)
coco2YOLO.py
import json
import cv2
import pandas as pd
from PIL import Image
from utils import *
# Convert INFOLKS JSON file into YOLO-format labels ----------------------------
def convert_infolks_json(name, files, img_path):
# Create folders
path = make_dirs()
# Import json
data = []
for file in glob.glob(files):
with open(file) as f:
jdata = json.load(f)
jdata['json_file'] = file
data.append(jdata)
# Write images and shapes
name = path + os.sep + name
file_id, file_name, wh, cat = [], [], [], []
for x in tqdm(data, desc='Files and Shapes'):
f = glob.glob(img_path + Path(x['json_file']).stem + '.*')[0]
file_name.append(f)
wh.append(exif_size(Image.open(f))) # (width, height)
cat.extend(a['classTitle'].lower() for a in x['output']['objects']) # categories
# filename
with open(name + '.txt', 'a') as file:
file.write('%s\n' % f)
# Write *.names file
names = sorted(np.unique(cat))
# names.pop(names.index('Missing product')) # remove
with open(name + '.names', 'a') as file:
[file.write('%s\n' % a) for a in names]
# Write labels file
for i, x in enumerate(tqdm(data, desc='Annotations')):
label_name = Path(file_name[i]).stem + '.txt'
with open(path + '/labels/' + label_name, 'a') as file:
for a in x['output']['objects']:
# if a['classTitle'] == 'Missing product':
# continue # skip
category_id = names.index(a['classTitle'].lower())
# The INFOLKS bounding box format is [x-min, y-min, x-max, y-max]
box = np.array(a['points']['exterior'], dtype=np.float32).ravel()
box[[0, 2]] /= wh[i][0] # normalize x by width
box[[1, 3]] /= wh[i][1] # normalize y by height
box = [box[[0, 2]].mean(), box[[1, 3]].mean(), box[2] - box[0], box[3] - box[1]] # xywh
if (box[2] > 0.) and (box[3] > 0.): # if w > 0 and h > 0
file.write('%g %.6f %.6f %.6f %.6f\n' % (category_id, *box))
# Split data into train, test, and validate files
split_files(name, file_name)
write_data_data(name + '.data', nc=len(names))
print('Done. Output saved to %s' % (os.getcwd() + os.sep + path))
# Convert vott JSON file into YOLO-format labels -------------------------------
def convert_vott_json(name, files, img_path):
# Create folders
path = make_dirs()
name = path + os.sep + name
# Import json
data = []
for file in glob.glob(files):
with open(file) as f:
jdata = json.load(f)
jdata['json_file'] = file
data.append(jdata)
# Get all categories
file_name, wh, cat = [], [], []
for i, x in enumerate(tqdm(data, desc='Files and Shapes')):
try:
cat.extend(a['tags'][0] for a in x['regions']) # categories
except:
pass
# Write *.names file
names = sorted(pd.unique(cat))
with open(name + '.names', 'a') as file:
[file.write('%s\n' % a) for a in names]
# Write labels file
n1, n2 = 0, 0
missing_images = []
for i, x in enumerate(tqdm(data, desc='Annotations')):
f = glob.glob(img_path + x['asset']['name'] + '.jpg')
if len(f):
f = f[0]
file_name.append(f)
wh = exif_size(Image.open(f)) # (width, height)
n1 += 1
if (len(f) > 0) and (wh[0] > 0) and (wh[1] > 0):
n2 += 1
# append filename to list
with open(name + '.txt', 'a') as file:
file.write('%s\n' % f)
# write labelsfile
label_name = Path(f).stem + '.txt'
with open(path + '/labels/' + label_name, 'a') as file:
for a in x['regions']:
category_id = names.index(a['tags'][0])
# The INFOLKS bounding box format is [x-min, y-min, x-max, y-max]
box = a['boundingBox']
box = np.array([box['left'], box['top'], box['width'], box['height']]).ravel()
box[[0, 2]] /= wh[0] # normalize x by width
box[[1, 3]] /= wh[1] # normalize y by height
box = [box[0] + box[2] / 2, box[1] + box[3] / 2, box[2], box[3]] # xywh
if (box[2] > 0.) and (box[3] > 0.): # if w > 0 and h > 0
file.write('%g %.6f %.6f %.6f %.6f\n' % (category_id, *box))
else:
missing_images.append(x['asset']['name'])
print('Attempted %g json imports, found %g images, imported %g annotations successfully' % (i, n1, n2))
if len(missing_images):
print('WARNING, missing images:', missing_images)
# Split data into train, test, and validate files
split_files(name, file_name)
print('Done. Output saved to %s' % (os.getcwd() + os.sep + path))
# Convert ath JSON file into YOLO-format labels --------------------------------
def convert_ath_json(json_dir): # dir contains json annotations and images
# Create folders
dir = make_dirs() # output directory
jsons = []
for dirpath, dirnames, filenames in os.walk(json_dir):
for filename in [f for f in filenames if f.lower().endswith('.json')]:
jsons.append(os.path.join(dirpath, filename))
# Import json
n1, n2, n3 = 0, 0, 0
missing_images, file_name = [], []
for json_file in sorted(jsons):
with open(json_file) as f:
data = json.load(f)
# # Get classes
# try:
# classes = list(data['_via_attributes']['region']['class']['options'].values()) # classes
# except:
# classes = list(data['_via_attributes']['region']['Class']['options'].values()) # classes
# # Write *.names file
# names = pd.unique(classes) # preserves sort order
# with open(dir + 'data.names', 'w') as f:
# [f.write('%s\n' % a) for a in names]
# Write labels file
for i, x in enumerate(tqdm(data['_via_img_metadata'].values(), desc='Processing %s' % json_file)):
image_file = str(Path(json_file).parent / x['filename'])
f = glob.glob(image_file) # image file
if len(f):
f = f[0]
file_name.append(f)
wh = exif_size(Image.open(f)) # (width, height)
n1 += 1 # all images
if len(f) > 0 and wh[0] > 0 and wh[1] > 0:
label_file = dir + 'labels/' + Path(f).stem + '.txt'
nlabels = 0
try:
with open(label_file, 'a') as file: # write labelsfile
for a in x['regions']:
# try:
# category_id = int(a['region_attributes']['class'])
# except:
# category_id = int(a['region_attributes']['Class'])
category_id = 0 # single-class
# bounding box format is [x-min, y-min, x-max, y-max]
box = a['shape_attributes']
box = np.array([box['x'], box['y'], box['width'], box['height']],
dtype=np.float32).ravel()
box[[0, 2]] /= wh[0] # normalize x by width
box[[1, 3]] /= wh[1] # normalize y by height
box = [box[0] + box[2] / 2, box[1] + box[3] / 2, box[2],
box[3]] # xywh (left-top to center x-y)
if box[2] > 0. and box[3] > 0.: # if w > 0 and h > 0
file.write('%g %.6f %.6f %.6f %.6f\n' % (category_id, *box))
n3 += 1
nlabels += 1
if nlabels == 0: # remove non-labelled images from dataset
os.system('rm %s' % label_file)
# print('no labels for %s' % f)
continue # next file
# write image
img_size = 4096 # resize to maximum
img = cv2.imread(f) # BGR
assert img is not None, 'Image Not Found ' + f
r = img_size / max(img.shape) # size ratio
if r < 1: # downsize if necessary
h, w, _ = img.shape
img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_AREA)
ifile = dir + 'images/' + Path(f).name
if cv2.imwrite(ifile, img): # if success append image to list
with open(dir + 'data.txt', 'a') as file:
file.write('%s\n' % ifile)
n2 += 1 # correct images
except:
os.system('rm %s' % label_file)
print('problem with %s' % f)
else:
missing_images.append(image_file)
nm = len(missing_images) # number missing
print('\nFound %g JSONs with %g labels over %g images. Found %g images, labelled %g images successfully' %
(len(jsons), n3, n1, n1 - nm, n2))
if len(missing_images):
print('WARNING, missing images:', missing_images)
# Write *.names file
names = ['knife'] # preserves sort order
with open(dir + 'data.names', 'w') as f:
[f.write('%s\n' % a) for a in names]
# Split data into train, test, and validate files
split_rows_simple(dir + 'data.txt')
write_data_data(dir + 'data.data', nc=1)
print('Done. Output saved to %s' % Path(dir).absolute())
def convert_coco_json(json_dir='../coco/annotations/', use_segments=False, cls91to80=False):
save_dir = make_dirs() # output directory
coco80 = coco91_to_coco80_class()
# Import json
for json_file in sorted(Path(json_dir).resolve().glob('*.json')):
fn = Path(save_dir) / 'labels' / json_file.stem.replace('instances_', '') # folder name
fn.mkdir()
with open(json_file) as f:
data = json.load(f)
# Create image dict
images = {'%g' % x['id']: x for x in data['images']}
# Write labels file
for x in tqdm(data['annotations'], desc=f'Annotations {json_file}'):
if x['iscrowd']:
continue
img = images['%g' % x['image_id']]
h, w, f = img['height'], img['width'], img['file_name']
# The COCO box format is [top left x, top left y, width, height]
box = np.array(x['bbox'], dtype=np.float64)
box[:2] += box[2:] / 2 # xy top-left corner to center
box[[0, 2]] /= w # normalize x
box[[1, 3]] /= h # normalize y
# Segments
if use_segments:
segments = [j for i in x['segmentation'] for j in i] # all segments concatenated
s = (np.array(segments).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist()
# Write
if box[2] > 0 and box[3] > 0: # if w > 0 and h > 0
cls = coco80[x['category_id'] - 1] if cls91to80 else x['category_id'] - 1 # class
line = cls, *(s if use_segments else box) # cls, box or segments
with open((fn / f).with_suffix('.txt'), 'a') as file:
file.write(('%g ' * len(line)).rstrip() % line + '\n')
if __name__ == '__main__':
source = 'COCO'
if source == 'COCO':
convert_coco_json('COD10K_CAM_coco/annotations',use_segments=True) # directory with *.json
elif source == 'infolks': # Infolks https://infolks.info/
convert_infolks_json(name='out',
files='../data/sm4/json/*.json',
img_path='../data/sm4/images/')
elif source == 'vott': # VoTT https://github.com/microsoft/VoTT
convert_vott_json(name='data',
files='../../Downloads/athena_day/20190715/*.json',
img_path='../../Downloads/athena_day/20190715/') # images folder
elif source == 'ath': # ath format
convert_ath_json(json_dir='../../Downloads/athena/') # images folder
# zip results
# os.system('zip -r ../coco.zip ../coco')
splitdata_sure.py
#!/usr/bin/env python3
from genericpath import exists
import os
import re
import fnmatch
from PIL import Image
import numpy as np
from pycocotools import mask
from PIL import Image
import codecs
from glob import glob
import shutil
def filter_for_jpeg(root, files):
file_types = ['*.jpeg', '*.jpg', '*.png']
file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
files = [os.path.join(root, f) for f in files]
files = [f for f in files if re.match(file_types, f)]
return files
ROOT_DIR = '/home/xyf/AllIn/CAM/COCO2YOLO/COD10K_CAM_coco/'
saved_path ="/home/xyf/AllIn/CAM/COCO2YOLO/new_dir/"
def splitData(part="train"):
f = open(saved_path+'train2017.txt', 'w') if part=="train" else open(saved_path+'val2017.txt', 'w')
IMAGE_DIR = os.path.join(ROOT_DIR, part+'2017')
imgSavDir = saved_path+ "images/"+part+'2017/'
if not os.path.exists(imgSavDir):
os.mkdir(imgSavDir)
for root, _, files in os.walk(IMAGE_DIR):
image_files = filter_for_jpeg(root, files)
# go through each image
for image_filename in image_files:
if not os.path.exists(saved_path + "labels/"+part+'2017/'+os.path.basename(image_filename).split('.')[0]+".txt"):
print("not exist:"+os.path.basename(image_filename).split('.')[0]+".txt")
continue
if not os.path.exists(imgSavDir+os.path.basename(image_filename)):
shutil.copy(image_filename, imgSavDir)
name = './'+"images/"+part+'2017/'+os.path.basename(image_filename)
f.write(name)
f.write('\n')
f.close()
if __name__ == "__main__":
for part in ['train','val']:
splitData(part=part) # 用于切分数据(适用非随机的切分)