VGG Image Annotator (VIA)一款开源的图像标注工具,由Visual Geometry Group开发。可以在线和离线使用,可标注矩形、圆、椭圆、多边形、点和线。标注完成后,可以导出为csv和json文件格式。
mmdetection等框架中有的不支持VIA标注的数据,就需要写一个转化的代码
其实就是两个json文件的转化,两种标注格式读入后就是两个dict格式的数据,我们要做的就是一层一层把dict拨开,放入coco format的dict中。
首先可以先格式化输出一下两个json文件,比较差别,需要安装一个包
yum install -y jq
然后
cat a.json | jq
然后就可以写转化的代码了,这里提供我写的版本,亲测好用。
中间有一些小逻辑可以忽略(自己用了,就不改了)
import json
import os
import cv2
import numpy as np
def _get_bbox(all_points_x, all_points_y):
min_x , max_x = min(all_points_x), max(all_points_x)
min_y , max_y = min(all_points_y), max(all_points_y)
bbox = [min_x, min_y, max_x - min_x, max_y - min_y]
return bbox
def _get_mask(all_points_x, all_points_y):
min_x , max_x = min(all_points_x), max(all_points_x)
min_y , max_y = min(all_points_y), max(all_points_y)
for zz in range(len(all_points_x)): #find the biggest in list
if(all_points_x[zz] == max_x):
break
left_lines = []
right_lines = []
mask = []
for zzz in range(len(all_points_x)): # draw lines
if zzz != zz :
if all_points_y[zzz] < all_points_y[zz]:
angle = abs(all_points_x[zzz] - all_points_x[zz])/abs(all_points_y[zzz] - all_points_y[zz] + 1e-7)
#print('left',angle)
left_lines.append([angle,all_points_x[zzz],all_points_y[zzz]])
else:
angle = abs(all_points_x[zzz] - all_points_x[zz])/abs(all_points_y[zzz] - all_points_y[zz] + 1e-7)
#print('right',angle)
right_lines.append([angle,all_points_x[zzz], all_points_y[zzz]])
left_lines.sort()
right_lines.sort(reverse=True)
#print(left_lines)
#print(right_lines)
for line_num in range(len(left_lines)):
mask.append(left_lines[line_num][1])
mask.append(left_lines[line_num][2])
for line_num in range(len(right_lines)):
mask.append(right_lines[line_num][1])
mask.append(right_lines[line_num][2])
mask.append(all_points_x[zz])
mask.append(all_points_y[zz])
return [mask]
def _produce_category_id(data): #整理所有的类别,排序,index作为列表的id
class_set = set()
for i in data:
if(len(data[i]["regions"])!=0):
if('name' in data[i]["regions"][0]["region_attributes"] and data[i]["regions"][0]["region_attributes"]["name"]!='USER'and data[i]["regions"][0]["region_attributes"]["name"]!='' and data[i]["regions"][0]["region_attributes"]["name"]!='ETTODAY' and data[i]["regions"][0]["region_attributes"]["name"]!='SICHUAN'and data[i]["regions"][0]["region_attributes"]["name"]!='RED' and data[i]["regions"][0]["region_attributes"]["name"]!='WEIXIN'): #clean data
class_set.add(data[i]["regions"][0]["region_attributes"]["name"].replace('\n',''))
class_set = list(class_set)
class_set.sort()
#print(class_set)
#np.save('category_id_list.npy',class_set)
return class_set
old_data = json.load(open('/Users/hank/Desktop/ad_12/annotations/via_region_data_train1.json')) #自有数据集的格式
new_data = json.load(open('/Users/hank/Desktop/data/coco2017/annotations/instances_val2017.json')) #coco的格式
image_sample = new_data["images"][0]
annotations_sample = new_data["annotations"][0]
category_sample = new_data["categories"][0]
#print(annotations_sample)
idx = 0
ann_idx = 0
errorcount = 0
img_info = [] #image标的结果
ann_info = [] #annotations标的结果
cat_info = [] #category标的结果
category_id_list = _produce_category_id(old_data)
image_path_list = []
#print(category_id_list)
user_count = 0
for _, image_i in enumerate(old_data) :
image_sample = {}
image_name = old_data[image_i]["filename"]
idx += 1
image_path = os.path.join("/Users/hank/Desktop/ad_12/train/" , image_name)
if(not os.path.exists(image_path)): #判断图片是否存在,不存在就不要了
errorcount += 1
idx -= 1
continue
all_annotations_in_per = old_data[image_i]["regions"]
flag = 0 #这个循环用来找有没有不需要的label
if(all_annotations_in_per ==[]): #not tag
image_path_list.append(image_path)
print(image_path)
os.remove(image_path)
for iii in range(len(all_annotations_in_per)):
if(not 'name' in old_data[image_i]["regions"][iii]["region_attributes"]):
print(image_name)
if ('name' in old_data[image_i]["regions"][iii]["region_attributes"]):
#print(old_data[image_i]["regions"][iii]["region_attributes"]["name"])
if(old_data[image_i]["regions"][iii]["region_attributes"]["name"] == 'USER' or old_data[image_i]["regions"][iii]["region_attributes"]["name"] == 'SICHUAN' or old_data[image_i]["regions"][iii]["region_attributes"]["name"] == 'ETTODAY' or old_data[image_i]["regions"][iii]["region_attributes"]["name"] == 'RED' or old_data[image_i]["regions"][iii]["region_attributes"]["name"] == 'WEIXIN'):
flag = 1
#print("happen")
if flag == 2:
#print(image_name)
user_count += 1
continue
if flag == 1:
#user_count += 1
os.remove(image_path)
#print(image_path)
continue
img = cv2.imread(image_path) #将image的键值对英国去
if img is not None:
h , w, _ = np.shape(img)
image_sample["license"] = 2
image_sample["coco_url"] = "http://images.cocodataset.org/val2017/000000500663.jpg"
image_sample["file_name"] = image_name
image_sample["id"] = idx
image_sample["height"] = h
image_sample["width"] = w
image_sample["data_captured"] = "2013-11-17 21:48:19"
image_sample["flickr_url"] = "http://farm1.staticflickr.com/198/488201322_ef2ebfeccb_z.jpg"
img_info.append(image_sample)
all_annotations_in_per = old_data[image_i]["regions"] #将annotation的键值对应过去
#########change annotation to new type
for j in range(len(all_annotations_in_per)):
annotations_sample = {}
#print(all_annotations_in_per)
ann_idx += 1
ann = all_annotations_in_per[j] #dict[shape_attributes, region_attributes]
ann_shape = ann["shape_attributes"]
if ann_shape['name'] == 'polygon':
all_points_x = ann_shape["all_points_x"]
all_points_y = ann_shape["all_points_y"]
bbox = _get_bbox(all_points_x, all_points_y)
mask = _get_mask(all_points_x, all_points_y)
annotations_sample['area'] = bbox[2] * bbox[3]
annotations_sample['segmentation'] = mask
annotations_sample['iscrowd'] = 0
annotations_sample['image_id'] = idx
annotations_sample['bbox'] = bbox
if('name' in ann["region_attributes"]):
#print(image_path)
annotations_sample['category_id'] = category_id_list.index(ann["region_attributes"]["name"].replace('\n','')) + 1
else:
continue
annotations_sample['id'] = ann_idx
#print(annotations_sample)
ann_info.append(annotations_sample)
for k in range(len(category_id_list)): #将category的键值对应过去
category_sample = {}
category_sample['supercategory'] = category_id_list[k]
category_sample['id'] = k + 1
category_sample['name'] = category_id_list[k]
print (category_sample)
cat_info.append(category_sample)
#print(errorcount)
print('image——num',len(img_info))
new_data["annotations"] = ann_info
new_data["images"] = img_info
new_data["categories"] = cat_info
print(len(category_id_list))
#print(new_data["categories"])
new_data = json.dumps(new_data) #将新的写到json文件去
#print(category_id_list)
with open('/Users/hank/Desktop/ad_12/annotations/coco_train1.json','w') as json_file:
json_file.write(new_data)
#print(type(new_data))