用YOLOV2对垃圾进行目标检测《一》

对于容易制作的数据集应尽量自己制作

一、数据集准备及标注

1. 使用工具:roLabelImg

2. 使用Windows相机进行拍摄,结果如下图

用YOLOV2对垃圾进行目标检测《一》_第1张图片

3. 标注过程

用YOLOV2对垃圾进行目标检测《一》_第2张图片

4. 讲数据放入数组中

数组结构:
[
{‘filename’: ‘D:\darknet\img\1.jpg’,
‘width’: 640,
‘height’: 512,
‘object’: [
{‘name’: ‘metal’, ‘xmin’: 227, ‘ymin’: 134, ‘xmax’: 452, ‘ymax’: 371}

]
},
{}

]

import os
import xml.etree.ElementTree as ET

def parse_annotation(ann_dir, img_dir, labels=[]):
    '''
    output:
    - Each element of the train_image is a dictionary containing the annoation infomation of an image.
    - seen_train_labels is the dictionary containing
            (key, value) = (the object class, the number of objects found in the images)
    '''
    all_imgs = []
    seen_labels = {}
    
    for ann in sorted(os.listdir(ann_dir)):
        if "xml" not in ann:
            continue
        img = {'object':[]}
        tree = ET.parse(ann_dir + ann)
        for elem in tree.iter():
            if 'filename' in elem.tag:
                path_to_image = img_dir + elem.text
                img['filename'] = path_to_image
                ## make sure that the image exists:
                if not os.path.exists(path_to_image):
                    print("file does not exist!\n{}".format(path_to_image))
            if 'width' in elem.tag:
                img['width'] = int(elem.text)
            if 'height' in elem.tag:
                img['height'] = int(elem.text)
            if 'object' in elem.tag or 'part' in elem.tag:
                obj = {}
                for attr in list(elem):
                    if 'name' in attr.tag:
                        obj['name'] = attr.text
                        img['object'] += [obj]
                        if obj['name'] in seen_labels:
                            seen_labels[obj['name']] += 1
                        else:
                            seen_labels[obj['name']]  = 1
                    if 'bndbox' in attr.tag:
                        for dim in list(attr):
                            if 'xmin' in dim.tag:
                                obj['xmin'] = int(round(float(dim.text)))
                            if 'ymin' in dim.tag:
                                obj['ymin'] = int(round(float(dim.text)))
                            if 'xmax' in dim.tag:
                                obj['xmax'] = int(round(float(dim.text)))
                            if 'ymax' in dim.tag:
                                obj['ymax'] = int(round(float(dim.text)))

        if len(img['object']) > 0:
            all_imgs += [img]
                        
    return all_imgs, seen_labels

## Parse annotations
train_annot_folder = 'D:\\darknet\\tag\\'
train_image_folder = 'D:\darknet\img\\'
LABELS = ['metal', 'paper', 'plastic']
train_image, seen_train_labels = parse_annotation(train_annot_folder, train_image_folder, labels=LABELS)
print("N train = {}".format(len(train_image)))
print(train_image[0])
>>>N train = 420
>>>{'object': [{'name': 'metal', 'xmin': 227, 'ymin': 134, 'xmax': 452, 'ymax': 371}], 'filename': 'D:\\darknet\\img\\1.jpg', 'width': 640, 'height': 512}

你可能感兴趣的:(yolov2)