VOC or COCO 提取特定类总结篇

VOC提取特定类

import os
import shutil

ann_filepath = r'E:\xunleidownload\VOCdevkit\VOC2012\Annotations/'
img_filepath = r'E:\xunleidownload\VOCdevkit\VOC2012\JPEGImages/'
img_savepath = r'E:\xunleidownload\VOCdevkit\cars\JPEGImages/'
ann_savepath = r'E:\xunleidownload\VOCdevkit\cars\Annotations/'
if not os.path.exists(img_savepath):
    os.mkdir(img_savepath)

if not os.path.exists(ann_savepath):
    os.mkdir(ann_savepath)
names = locals()
classes = ['bicycle','bus', 'car','motorbike']

for file in os.listdir(ann_filepath):
    print(file)
    fp = open(ann_filepath + '\\' + file)
    ann_savefile = ann_savepath + file
    fp_w = open(ann_savefile, 'w')
    lines = fp.readlines()

    ind_start = []
    ind_end = []
    lines_id_start = lines[:]
    lines_id_end = lines[:]

    classes1 = '\t\tbicycle\n'
    classes2 = '\t\tmotorbike\n'
    classes3 = '\t\tbus\n'
    classes4 = '\t\tcar\n'
    classes5 = '\t\tperson\n'

    # 在xml中找到object块,并将其记录下来
    while "\t\n" in lines_id_start:
        a = lines_id_start.index("\t\n")
        ind_start.append(a)
        lines_id_start[a] = "delete"

    while "\t\n" in lines_id_end:
        b = lines_id_end.index("\t\n")
        ind_end.append(b)
        lines_id_end[b] = "delete"

    # names中存放所有的object块
    i = 0
    for k in range(0, len(ind_start)):
        names['block%d' % k] = []
        for j in range(0, len(classes)):
            if classes[j] in lines[ind_start[i] + 1]:
                a = ind_start[i]
                for o in range(ind_end[i] - ind_start[i] + 1):
                    names['block%d' % k].append(lines[a + o])
                break
        i += 1
        # print(names['block%d' % k])

    # xml头
    string_start = lines[0:ind_start[0]]
    # xml尾
    string_end = [lines[len(lines) - 1]]

    # 在给定的类中搜索,若存在则,写入object块信息
    a = 0
    for k in range(0, len(ind_start)):
        if classes1 in names['block%d' % k]:
            a += 1
            string_start += names['block%d' % k]
        if classes2 in names['block%d' % k]:
            a += 1
            string_start += names['block%d' % k]
        if classes3 in names['block%d' % k]:
            a += 1
            string_start += names['block%d' % k]
        if classes4 in names['block%d' % k]:
            a += 1
            string_start += names['block%d' % k]
        if classes5 in names['block%d' % k]:
            a += 1
            string_start += names['block%d' % k]
    string_start += string_end
    for c in range(0, len(string_start)):
        fp_w.write(string_start[c])
    fp_w.close()
    # 如果没有我们寻找的模块,则删除此xml,有的话拷贝图片
    if a == 0:
        os.remove(ann_savepath + file)
    else:
        name_img = img_filepath + os.path.splitext(file)[0] + ".jpg"
        shutil.copy(name_img, img_savepath)
    fp.close()

COCO提取特定类

from pycocotools.coco import COCO
import os
import shutil
from tqdm import tqdm
import skimage.io as io
import matplotlib.pyplot as plt
import cv2
from PIL import Image, ImageDraw

#the path you want to save your results for coco to voc
savepath=r"G:\py_file\OIDv4_ToolKit-master\OID001\new/"
img_dir=savepath+'JPEGImages/'
anno_dir=savepath+'Annotations/'
# datasets_list=['train2014', 'val2014']
# datasets_list=['train2014']
datasets_list=['train2017']

# classes_names = ['parking meter']
# classes_names = ["person", "bicycle", "car", "motorcycle","bus"]

# classes_names = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
#            "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
#            "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
#            "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
#            "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
#            "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
#            "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
#            "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
#            "hair drier", "toothbrush"]
# classes_names = ["person", "bicycle", "car", "motorcycle","bus", "truck", "boat",
#               "traffic light","stop sign", "parking meter",
#               "cat", "dog","umbrella", "suitcase", "baseball bat",
#               "bottle", "wine glass", "cup","bowl",
#               "banana", "apple","orange", "broccoli", "carrot", "hot dog", "cake",
#               "chair", "couch","bench","potted plant", "bed", "dining table", "tv", "laptop","cell phone",
#               "microwave","refrigerator", "book", "vase", "teddy bear"]
classes_names = [ "apple","banana", "broccoli","carrot","knife","orange","teddy bear","toothbrush","umbrella"]
# classes_names = ['truck']
#Store annotations and train2014/val2014/... in this folder
dataDir= r'E:\xunleidownload\COCO/'

headstr = """\

    VOC
    %s
    
        My Database
        COCO
        flickr
        NULL
    
    
        %d
        %d
        %d
    
    0
"""
objstr = """\
    
        %s
        Unspecified
        0
        0
        
            %d
            %d
            %d
            %d
        
    
"""

tailstr = '''\

'''

#if the dir is not exists,make it,else delete it
def mkr(path):
    if os.path.exists(path):
        shutil.rmtree(path)
        os.mkdir(path)
    else:
        os.mkdir(path)
mkr(img_dir)
mkr(anno_dir)
def id2name(coco):
    classes=dict()
    for cls in coco.dataset['categories']:
        classes[cls['id']]=cls['name']
    return classes

def write_xml(anno_path,head, objs, tail):
    f = open(anno_path, "w")
    f.write(head)
    for obj in objs:
        f.write(objstr%(obj[0],obj[1],obj[2],obj[3],obj[4]))
    f.write(tail)


def save_annotations_and_imgs(coco,dataset,filename,objs):
    #eg:COCO_train2014_000000196610.jpg-->COCO_train2014_000000196610.xml
    anno_path=anno_dir+filename[:-3]+'xml'
    img_path=dataDir+dataset+'/'+filename
    # print(img_path)
    dst_imgpath=img_dir+filename
    print(img_path,'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')

    img=cv2.imread(img_path)
    # print(img)
    if (img.shape[2] == 1):
        print(filename + " not a RGB image")
        return

    shutil.copy(img_path, dst_imgpath)

    head=headstr % (filename, img.shape[1], img.shape[0], img.shape[2])
    tail = tailstr
    write_xml(anno_path,head, objs, tail)


def showimg(coco,dataset,img,classes,cls_id,show=True):
    global dataDir
    I=Image.open('%s/%s/%s'%(dataDir,dataset,img['file_name']))
    #Get the annotated information by ID
    annIds = coco.getAnnIds(imgIds=img['id'], catIds=cls_id, iscrowd=None)
    # print(annIds)
    anns = coco.loadAnns(annIds)
    # print(anns)
    # coco.showAnns(anns)
    objs = []
    for ann in anns:
        class_name=classes[ann['category_id']]
        if class_name in classes_names:
            print(class_name)
            if 'bbox' in ann:
                bbox=ann['bbox']
                xmin = int(bbox[0])
                ymin = int(bbox[1])
                xmax = int(bbox[2] + bbox[0])
                ymax = int(bbox[3] + bbox[1])
                obj = [class_name, xmin, ymin, xmax, ymax]
                objs.append(obj)
                draw = ImageDraw.Draw(I)
                draw.rectangle([xmin, ymin, xmax, ymax])
    if show:
        plt.figure()
        plt.axis('off')
        plt.imshow(I)
        plt.show()

    return objs

for dataset in datasets_list:
    #./COCO/annotations/instances_train2014.json
    annFile='{}/annotations/instances_{}.json'.format(dataDir,dataset)

    #COCO API for initializing annotated data
    coco = COCO(annFile)
    '''
    When the COCO object is created, the following information will be output:
    loading annotations into memory...
    Done (t=0.81s)
    creating index...
    index created!
    So far, the JSON script has been parsed and the images are associated with the corresponding annotated data.
    '''
    #show all classes in coco
    classes = id2name(coco)
    print(classes)
    #[1, 2, 3, 4, 6, 8]
    classes_ids = coco.getCatIds(catNms=classes_names)
    print(classes_ids)
    # exit()
    for cls in classes_names:
        #Get ID number of this class
        cls_id=coco.getCatIds(catNms=[cls])
        img_ids=coco.getImgIds(catIds=cls_id)
        print(cls,len(img_ids))
        # imgIds=img_ids[0:10]
        for imgId in tqdm(img_ids):
            img = coco.loadImgs(imgId)[0]
            filename = img['file_name']
            # print(filename)
            objs=showimg(coco, dataset, img, classes,classes_ids,show=False)
            print(objs)
            save_annotations_and_imgs(coco, dataset, filename, objs)

你可能感兴趣的:(数据处理)