YOU ONLY LOOK ONCE

数据集准备

使用labelimg工具标记数据（voc格式）

把标记好的xml文件转成txt,转化脚本如下(python2.7)

1, 获取所有图像名
```
import os
dirlist  = os.listdir("/home/room/mxj_workspace/data/voc_clock/train_img/")
fp = open("ImageID.txt","w")
for name in dirlist:   
    fp.write(name)
    fp.write("\n")
fp.close()
```
2，提取xml中的坐标和label转化为txt，注意修改label名字和路径，新建好对应的ImageID文件夹，最后把生成的txt拷贝到train_img.

import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join

#sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
#classes = ["black_watch","box_watch","white_watch","light_watch","square_watch","sign","IO_State"]
classes = ["s_box"]


def convert(size, box):
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

def convert_annotation (image_id):
    in_file = open('/home/mahxn0/darknet/box/train_xml/%s.xml'%(image_id))
    out_file = open('/home/mahxn0/darknet/box/ImageID/%s.txt'%(image_id), 'w')
    tree=ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    for obj in root.iter('object'):

        cls = obj.find('name').text
        if cls not in classes :
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w,h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

#wd = getcwd()

#for year, image_set in sets:
if not os.path.exists('/home/mahxn0/darknet/box/img_file'):
    os.makedirs('/home/mahxn0/darknet/box/img_file/')
image_ids = open('/home/mahxn0/darknet/box/ImageID.txt').read().strip().split()
listtr_file = open('/home/mahxn0/darknet/box/train.list', 'w')
listte_file = open('/home/mahxn0/darknet/box/val.list', 'w')
i = 0
for image_id in image_ids:
    i = i+1
    if(i%10 ==  0):
        listte_file.write('/home/mahxn0/darknet/box/train_img/%s.jpg\n'%( image_id.split('.')[0]))
    else:
        listtr_file.write('/home/mahxn0/darknet/box/train_img/%s.jpg\n'%( image_id.split('.')[0]))
    convert_annotation(image_id.split('.')[0])
listte_file.close()
listtr_file.close()

#os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt")
#os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt")


##  yolov3

修改cfg文件关键：3*(classes+5)
找到cfg文件的三处classes位置，classes改成你的检测类别数，上一层filter修改为：3*(classes+5)
修改cfg/coco.data的类别数为你自己检测的类别数目,train.list和val.list改为你上面用label.py生成的，coco.names里面修改为你自己的label名字，backup是模型保存的位置

训练

  ./darknet detector train cfg/coco.data cfg/yolov3.cfg darknet53.conv.74 -gpus 0,1,2,3

  Region 23 Avg IOU: 0.331109, Class: 0.552714, Obj: 0.017880, No Obj: 0.021078, .5R: 0.129032, .75R: 0.000000,  count: 62

 219: 5.798628, 26.150927 avg loss, 0.000007 rate, 1.180564 seconds, 42048 images
Loaded: 12.885740 seconds
Region 16 Avg IOU: 0.210043, Class: 0.500716, Obj: 0.037469, No Obj: 0.031145, .5R: 0.000000, .75R: 0.000000,  count: 3
Region 16 Avg IOU: 0.302149, Class: 0.318319, Obj: 0.086097, No Obj: 0.030979, .5R: 0.000000, .75R: 0.000000,  count: 1
Region 16 Avg IOU: 0.203812, Class: 0.335673, Obj: 0.063994, No Obj: 0.031331, .5R: 0.000000, .75R: 0.000000,  count: 1
Region 23 Avg IOU: 0.312156, Class: 0.556277, Obj: 0.012325, No Obj: 0.019171, .5R: 0.120000, .75R: 0.000000,  count: 50
Region 23 Avg IOU: 0.373455, Class: 0.508114, Obj: 0.015595, No Obj: 0.019038, .5R: 0.203390, .75R: 0.000000,  count: 59
Region 23 Avg IOU: 0.344760, Class: 0.490172, Obj: 0.013907, No Obj: 0.019223, .5R: 0.187500, .75R: 0.000000,  count: 48
Region 16 Avg IOU: 0.454259, Class: 0.426787, Obj: 0.027839, No Obj: 0.031548, .5R: 0.000000, .75R: 0.000000,  count: 1
Region 16 Avg IOU: 0.366378, Class: 0.445379, Obj: 0.043471, No Obj: 0.030944, .5R: 0.000000, .75R: 0.000000,  count: 2
Region 16 Avg IOU: -nan, Class: -nan, Obj: -nan, No Obj: 0.030927, .5R: -nan, .75R: -nan,  count: 0
Region 23 Avg IOU: 0.362018, Class: 0.513913, Obj: 0.014860, No Obj: 0.019196, .5R: 0.224138, .75R: 0.000000,  count: 58
Region 23 Avg IOU: 0.278272, Class: 0.531918, Obj: 0.013913, No Obj: 0.019277, .5R: 0.065217, .75R: 0.000000,  count: 46
Region 23 Avg IOU: 0.322512, Class: 0.549836, Obj: 0.016681, No Obj: 0.019718, .5R: 0.102564, .75R: 0.000000,  count: 39

tiny-yolov3

获取预训练模型

从官方地址下载yolov3-tiny.weights
下载理论上并没有说提取多少层的特征合适，这里我们提取前15层当作与训练模型

./darknet partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15

---

layer     filters    size              input                output
   0 conv     16  3 x 3 / 1   416 x 416 x   3   ->   416 x 416 x  16 0.150 BF
   1 max          2 x 2 / 2   416 x 416 x  16   ->   208 x 208 x  16 0.003 BF
   2 conv     32  3 x 3 / 1   208 x 208 x  16   ->   208 x 208 x  32 0.399 BF
   3 max          2 x 2 / 2   208 x 208 x  32   ->   104 x 104 x  32 0.001 BF
   4 conv     64  3 x 3 / 1   104 x 104 x  32   ->   104 x 104 x  64 0.399 BF
   5 max          2 x 2 / 2   104 x 104 x  64   ->    52 x  52 x  64 0.001 BF
   6 conv    128  3 x 3 / 1    52 x  52 x  64   ->    52 x  52 x 128 0.399 BF
   7 max          2 x 2 / 2    52 x  52 x 128   ->    26 x  26 x 128 0.000 BF
   8 conv    256  3 x 3 / 1    26 x  26 x 128   ->    26 x  26 x 256 0.399 BF
   9 max          2 x 2 / 2    26 x  26 x 256   ->    13 x  13 x 256 0.000 BF
  10 conv    512  3 x 3 / 1    13 x  13 x 256   ->    13 x  13 x 512 0.399 BF
  11 max          2 x 2 / 1    13 x  13 x 512   ->    13 x  13 x 512 0.000 BF
  12 conv   1024  3 x 3 / 1    13 x  13 x 512   ->    13 x  13 x1024 1.595 BF
  13 conv    256  1 x 1 / 1    13 x  13 x1024   ->    13 x  13 x 256 0.089 BF
  14 conv    512  3 x 3 / 1    13 x  13 x 256   ->    13 x  13 x 512 0.399 BF
  15 conv    255  1 x 1 / 1    13 x  13 x 512   ->    13 x  13 x 255 0.044 BF
  16 yolo
  17 route  13
  18 conv    128  1 x 1 / 1    13 x  13 x 256   ->    13 x  13 x 128 0.011 BF
  19 upsample            2x    13 x  13 x 128   ->    26 x  26 x 128
  20 route  19 8
  21 conv    256  3 x 3 / 1    26 x  26 x 384   ->    26 x  26 x 256 1.196 BF
  22 conv    255  1 x 1 / 1    26 x  26 x 256   ->    26 x  26 x 255 0.088 BF
  23 yolo
Total BFLOPS 5.571 
Loading weights from backup/yolov3-tiny.weights...
 seen 64 
Done!
Saving weights to yolov3-tiny.conv.15

训练：

./darknet detector train cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.conv.15 15

问题汇总：

1> 多个模型报错out of memory，把cfg中的batch,sub设置为1

2>海康相机的解码：

"rtspsrc location=rtsp://admin:[email protected]:554/h264/ch1/main/av_stream latency=200 ! rtph264depay ! h264parse ! omxh264dec ! videoconvert ! appsink sync=false"

3>刚开始出现nan正常现象，如果全部是nan,是xml生成的txt错误或者label名字错误，查看coco.names，生成的txt确定文件都正确

4>2000张样本迭代2000-5000次最佳，10000样本迭代20000次左右(主要看学习率的下降和数据复杂度)

测试:

-c  index            按照索引打开摄像头
-out_filename *.avi  保存结果到视频文件
-thresh              设置检测置信度
-ext_output < /media/mahxn0/DATA/tool/state3.list > result1.txt 
                     批量测试图片准确度并且显示

python v2调用(已封装成python库)

注意：get_network_boxs函数最后的c_int是调节框的准确度的
模型的batchsize和subvision测试的时候改成1 ，否则检测结果会不准确，目前正在查找原因
free image必须打开释放内存

#-*- coding=utf-8 -*-
from ctypes import *
import math
import random
import time
import cv2
import numpy as np
import re
import os
import sys
def sample(probs):
    s = sum(probs)
    probs = [a/s for a in probs]
    r = random.uniform(0, 1)
    for i in range(len(probs)):
        r = r - probs[i]
        if r <= 0:
            return i
    return len(probs)-1

# def c_array(ctype, values):
#     arr = (ctype*len(values))()
#     arr[:] = values
#     return arr

def c_array(ctype, values):
    return (ctype * len(values))(*values)

class BOX(Structure):
    _fields_ = [("x", c_float),
                ("y", c_float),
                ("w", c_float),
                ("h", c_float)]

class DETECTION(Structure):
    _fields_ = [("bbox", BOX),
                ("classes", c_int),
                ("prob", POINTER(c_float)),
                ("mask", POINTER(c_float)),
                ("objectness", c_float),
                ("sort_class", c_int)]


class IMAGE(Structure):
    _fields_ = [("w", c_int),
                ("h", c_int),
                ("c", c_int),
                ("data", POINTER(c_float))]

class METADATA(Structure):
    _fields_ = [("classes", c_int),
                ("names", POINTER(c_char_p))]


#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL)
lib = CDLL("/home/mahxn0/darknet/darknet.so", RTLD_GLOBAL)
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int

predict = lib.network_predict
predict.argtypes = [c_void_p, POINTER(c_float)]
predict.restype = POINTER(c_float)

set_gpu = lib.cuda_set_device
set_gpu.argtypes = [c_int]

make_image = lib.make_image
make_image.argtypes = [c_int, c_int, c_int]
make_image.restype = IMAGE

get_network_boxes = lib.get_network_boxes
get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int),c_int]
get_network_boxes.restype = POINTER(DETECTION)

make_network_boxes = lib.make_network_boxes
make_network_boxes.argtypes = [c_void_p]
make_network_boxes.restype = POINTER(DETECTION)

free_detections = lib.free_detections
free_detections.argtypes = [POINTER(DETECTION), c_int]

free_ptrs = lib.free_ptrs
free_ptrs.argtypes = [POINTER(c_void_p), c_int]

network_predict = lib.network_predict
network_predict.argtypes = [c_void_p, POINTER(c_float)]

reset_rnn = lib.reset_rnn
reset_rnn.argtypes = [c_void_p]

load_net = lib.load_network
load_net.argtypes = [c_char_p, c_char_p, c_int]
load_net.restype = c_void_p

do_nms_obj = lib.do_nms_obj
do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]

do_nms_sort = lib.do_nms_sort
do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]

free_image = lib.free_image
free_image.argtypes = [IMAGE]

letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE

load_meta = lib.get_metadata
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA

load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE

rgbgr_image = lib.rgbgr_image
rgbgr_image.argtypes = [IMAGE]

predict_image = lib.network_predict_image
predict_image.argtypes = [c_void_p, IMAGE]
predict_image.restype = POINTER(c_float)

ndarray_image = lib.ndarray_to_image
ndarray_image.argtypes = [POINTER(c_ubyte), POINTER(c_long), POINTER(c_long)]
ndarray_image.restype = IMAGE

#net = load_net("/media/mahxn0/DATA/workspace/src/detectAndRecog/src/yolo_surface/data/robot/robot.cfg", "/media/mahxn0/DATA/workspace/src/detectAndRecog/src/yolo_surface/data/robot/robot_final.weights", 0)
#meta = load_meta("/media/mahxn0/DATA/workspace/src/detectAndRecog/src/yolo_surface/data/robot/robot.data")

net = load_net("/home/mahxn0/darknet/yolo_box/box.cfg", "/home/mahxn0/darknet/yolo_box/backup/box_final.weights", 0)
meta = load_meta("/home/mahxn0/darknet/yolo_box/box.data")

#net = load_net("/home/mahxn0/ROS_workspace/darknet/cfg/yolov3-tiny.cfg", "/home/mahxn0/ROS_workspace/darknet/yolov3-tiny.weights", 0)
#meta = load_meta("/home/mahxn0/ROS_workspace/darknet/cfg/coco.data")
#video =cv2.VideoCapture(0)

class yolo_helmet(object):
    def __init__(self):
        pass
    def detect_pic(self, image, thresh=0.3, hier_thresh=.5, nms=.45):
        im = self.nparray_to_image(image)
        num = c_int(0)
        pnum = pointer(num)
        predict_image(net, im)
        dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum)
        num = pnum[0]
        if (nms): do_nms_obj(dets, num, meta.classes, nms)
        res = []
        for j in range(num):
            for i in range(meta.classes):
                if dets[j].prob[i] > 0:
                    b = dets[j].bbox
                    left=(b.x-b.w/2)
                    right=(b.x+b.w/2)
                    top=(b.y-b.h/2)
                    bot=(b.y+b.h/2)
                    if left < 0:
                            left = 0
                    if right > im.w-1:
                            right = im.w-1
                    if top < 0:
                            top = 0
                    if bot > im.h-1:
                            bot = im.h-1
                    res.append((meta.names[i], dets[j].prob[i],left,top,right,bot))
        res = sorted(res, key=lambda x: -x[1])
        free_image(im) #not sure if this will cause a memory leak.
        free_detections(dets, num)
        return res

    def detect(self, image, thresh=.3, hier_thresh=.5, nms=.45):
        t0=time.time()
        #rgbgr_image(im)
        im = self.nparray_to_image(image)

        t1=time.time()
        num = c_int(0)
        pnum = pointer(num)
        predict_image(net, im)
        dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum,0)
        num = pnum[0]
        if (nms): do_nms_obj(dets, num, meta.classes, nms)
        res = []
        for j in range(num):
            for i in range(meta.classes):
                if dets[j].prob[i] > 0:
                    b = dets[j].bbox
                    left=b.x-b.w/2
                    right=b.x+b.w/2
                    top=b.y-b.h/2
                    bot=b.y+b.h/2
                    if left < 0:
                         left = 0
                    if right > im.w-1:
                         right = im.w-1
                    if top < 0:
                         top = 0
                    if bot > im.h-1:
                         bot = im.h-1
                    res.append((meta.names[i], dets[j].prob[i],left,top,right,bot))
        res = sorted(res, key=lambda x: -x[1])
        free_image(im) #not sure if this will cause a memory leak.
        free_detections(dets, num)
        t2=time.time()
        print("detect take %d s",t2-t0);
        print("array_to_image take %d s",t1-t0)
        return res
    def array_to_image(self,arr):
         arr = arr.transpose(2,0,1)
         c = arr.shape[0]
         h = arr.shape[1]
         w = arr.shape[2]
         arr = (arr/255.0).flatten()
         data = c_array(c_float, arr)
         im = IMAGE(w,h,c,data)
         return im

    def nparray_to_image(self,img):
        data = img.ctypes.data_as(POINTER(c_ubyte))
        image = ndarray_image(data, img.ctypes.shape, img.ctypes.strides)
        return image

    def getXY(self,i):
        return int(i)
if __name__ == "__main__":

    picDir = '/home/mahxn0/Downloads/20181226/'
    print("path:",picDir)
    filenames = os.listdir(picDir)
    i=0
    r=yolo_helmet()
    font=cv2.FONT_HERSHEY_SCRIPT_COMPLEX
    for name in filenames:
        filename = os.path.join(picDir,name)
        print(filename)
        image=cv2.imread(filename)
        out=r.detect(image)
        print(out)
        for res in out:
            x1=r.getXY(res[2])
            y1=r.getXY(res[3])
            x2=r.getXY(res[4])
            y2=r.getXY(res[5])
            if x1>300 and y1 > 300:
                frame_rect=image[(y1-80):(y2+80),(x1-80):(x2+80)]
                cv2.imwrite('/home/mahxn0/darknet/image/box_rect3/'+str(i)+'.jpg',frame_rect)
            i+=1



    # f_img=None
    # cap=cv2.VideoCapture()
    # cap.open("/media/mahxn0/Mahxn0/M_DataSets/jinan_data/Video/2018-07-07/192.168.0.222_01_20180707150311306.mp4")
#   #  cap.set(3,1280)
#   #  cap.set(4,720)
#   #  cap.set(5,60)
#   #  cap.set(10,-4)
#   #  cap.set(11,40)
#   #  cap.set(12,20)
#   #  cap.set(15,-2)
    # #cap.open('rtsp://admin:[email protected]:554/h264/ch1/main/av_stream')
    # cv2.namedWindow('YOLOV3')
    # r = yolo_helmet()
    # result = None
    # fileindex=0

    # font=cv2.FONT_HERSHEY_SCRIPT_COMPLEX
    # #font = cv2.CAP_PVAPI_PIXELFORMAT_MONO8 # 使用默认字体
    # while(cap.isOpened()):
    #   rect,frame=cap.read()
    #    frame_res=frame
    #   if True:
    #         out = r.detect(frame)
 #  #         cv2.imshow("YOLOV3",frame)
    #         print(out)
    #         for res in out:
    #             x1=r.getXY(res[2])
    #             y1=r.getXY(res[3])
    #             x2=r.getXY(res[4])
    #             y2=r.getXY(res[5])
    #             frame_res=cv2.rectangle(frame, (x1,y1), (x2,y2), (87,255,123),4)
    #             cv2.putText(frame_res, res[0] + ' ' + str(res[1]), (x1,y1), font,1,(0,0,255),2)
    #             frame_rect=frame[x2:y2,x1:y1]
    #             cv2.imwrite("test.jpg",frame_rect)
    #    if  frame_res is None:
    #         print("frame_res is empty")
    #    else:
    #         cv2.imshow("YOLOV3",frame)
    #         cv2.waitKey(1)

pythonv3调用:

from ctypes import *
import math
import random
import cv2
import time
import numpy as np


def sample(probs):
    s = sum(probs)
    probs = [a/s for a in probs]
    r = random.uniform(0, 1)
    for i in range(len(probs)):
        r = r - probs[i]
        if r <= 0:
            return i
    return len(probs)-1

def c_array(ctype, values):
    arr = (ctype*len(values))()
    arr[:] = values
    return arr

class BOX(Structure):
    _fields_ = [("x", c_float),
                ("y", c_float),
                ("w", c_float),
                ("h", c_float)]

class DETECTION(Structure):
    _fields_ = [("bbox", BOX),
                ("classes", c_int),
                ("prob", POINTER(c_float)),
                ("mask", POINTER(c_float)),
                ("objectness", c_float),
                ("sort_class", c_int)]


class IMAGE(Structure):
    _fields_ = [("w", c_int),
                ("h", c_int),
                ("c", c_int),
                ("data", POINTER(c_float))]

class METADATA(Structure):
    _fields_ = [("classes", c_int),
                ("names", POINTER(c_char_p))]



#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL)
lib = CDLL("/home/mahxn0/darknet/darknet.so", RTLD_GLOBAL)
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int

predict = lib.network_predict
predict.argtypes = [c_void_p, POINTER(c_float)]
predict.restype = POINTER(c_float)

set_gpu = lib.cuda_set_device
set_gpu.argtypes = [c_int]

make_image = lib.make_image
make_image.argtypes = [c_int, c_int, c_int]
make_image.restype = IMAGE

get_network_boxes = lib.get_network_boxes
get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)]
get_network_boxes.restype = POINTER(DETECTION)

make_network_boxes = lib.make_network_boxes
make_network_boxes.argtypes = [c_void_p]
make_network_boxes.restype = POINTER(DETECTION)

free_detections = lib.free_detections
free_detections.argtypes = [POINTER(DETECTION), c_int]

free_ptrs = lib.free_ptrs
free_ptrs.argtypes = [POINTER(c_void_p), c_int]

network_predict = lib.network_predict
network_predict.argtypes = [c_void_p, POINTER(c_float)]

reset_rnn = lib.reset_rnn
reset_rnn.argtypes = [c_void_p]

load_net = lib.load_network
load_net.argtypes = [c_char_p, c_char_p, c_int]
load_net.restype = c_void_p

do_nms_obj = lib.do_nms_obj
do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]

do_nms_sort = lib.do_nms_sort
do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]

free_image = lib.free_image
free_image.argtypes = [IMAGE]

letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE

load_meta = lib.get_metadata
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA

load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE

rgbgr_image = lib.rgbgr_image
rgbgr_image.argtypes = [IMAGE]

predict_image = lib.network_predict_image
predict_image.argtypes = [c_void_p, IMAGE]
predict_image.restype = POINTER(c_float)

net = load_net(b"model/yolo_box/box.cfg", b"model/yolo_box/box.weights", 0)
meta = load_meta(b"model/yolo_box/box.data")

class yolo(object):
    def __init__(self):
        pass
    def convertBack(self,x, y, w, h):
        xmin = int(round(x - (w / 2)))
        xmax = int(round(x + (w / 2)))
        ymin = int(round(y - (h / 2)))
        ymax = int(round(y + (h / 2)))
        return xmin, ymin, xmax, ymax

    def array_to_image(self,arr):
        # need to return old values to avoid python freeing memory
        arr = arr.transpose(2,0,1)
        c, h, w = arr.shape[0:3]
        arr = np.ascontiguousarray(arr.flat, dtype=np.float32) / 255.0
        data = arr.ctypes.data_as(POINTER(c_float))
        im = IMAGE(w,h,c,data)
        return im, arr

    def detect(self,image, thresh=.5, hier_thresh=.5, nms=.45):
        im, image = self.array_to_image(image)
        rgbgr_image(im)
        num = c_int(0)
        pnum = pointer(num)
        predict_image(net, im)
        dets = get_network_boxes(net, im.w, im.h, thresh,
                                 hier_thresh, None, 0, pnum)
        num = pnum[0]
        if nms: do_nms_obj(dets, num, meta.classes, nms)

        res = []
        for j in range(num):
            a = dets[j].prob[0:meta.classes]
            if any(a):
                ai = np.array(a).nonzero()[0]
                for i in ai:
                    b = dets[j].bbox
                    #res.append((meta.names[i], dets[j].prob[i],
                    #           (b.x, b.y, b.w, b.h)))
                    left=(b.x-b.w/2)
                    right=(b.x+b.w/2)
                    top=(b.y-b.h/2)
                    bot=(b.y+b.h/2)
                    if left < 0:
                            left = 0
                    if right > im.w-1:
                            right = im.w-1
                    if top < 0:
                            top = 0
                    if bot > im.h-1:
                            bot = im.h-1
                    res.append((meta.names[i], dets[j].prob[i],left,top,right,bot))
        res = sorted(res, key=lambda x: -x[1])
        if isinstance(image, bytes): free_image(im)
        free_detections(dets, num)
        return res


if __name__ == "__main__":
    # load video here
    cap = cv2.VideoCapture("board0.mp4")
    ret, img = cap.read()
    fps = cap.get(cv2.CAP_PROP_FPS)
    yolo=yolo()
    print("Frames per second using video.get(cv2.CAP_PROP_FPS) : {0}".format(fps))
    cv2.namedWindow("img", cv2.WINDOW_NORMAL)
    while(1):

        ret, img = cap.read()
        if ret:
            # r = detect_np(net, meta, img)
            r = yolo.detect(img)

            for i in r:
                x, y, w, h = i[2][0], i[2][1], i[2][2], i[2][3]
                xmin, ymin, xmax, ymax = yolo.convertBack(float(x), float(y), float(w), float(h))
                pt1 = (xmin, ymin)
                pt2 = (xmax, ymax)
                cv2.rectangle(img, pt1, pt2, (0, 255, 0), 2)
                cv2.putText(img, i[0].decode() + " [" + str(round(i[1] * 100, 2)) + "]", (pt1[0], pt1[1] + 20), cv2.FONT_HERSHEY_SIMPLEX, 1, [0, 255, 0], 4)
            cv2.imshow("img", img)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

yolov4

自动给xml打标签

最近新出yolov4,发现之前的python版本会报错,发现是函数的参数变了,修改以后可以了,放上代码
调用yolov4训练的模型对车辆行人非机动车自动打标签:

#-*- coding=utf-8 -*-
from ctypes import *
import math
import random
import time
import cv2
import numpy as np
import re
import os
import sys
def sample(probs):
    s = sum(probs)
    probs = [a/s for a in probs]
    r = random.uniform(0, 1)
    for i in range(len(probs)):
        r = r - probs[i]
        if r <= 0:
            return i
    return len(probs)-1

# def c_array(ctype, values):
#     arr = (ctype*len(values))()
#     arr[:] = values
#     return arr

def c_array(ctype, values):
    return (ctype * len(values))(*values)

class BOX(Structure):
    _fields_ = [("x", c_float),
                ("y", c_float),
                ("w", c_float),
                ("h", c_float)]

class DETECTION(Structure):
    _fields_ = [("bbox", BOX),
                ("classes", c_int),
                ("prob", POINTER(c_float)),
                ("mask", POINTER(c_float)),
                ("objectness", c_float),
                ("sort_class", c_int),
                ("uc", POINTER(c_float)),
                ("points", c_int)]


class IMAGE(Structure):
    _fields_ = [("w", c_int),
                ("h", c_int),
                ("c", c_int),
                ("data", POINTER(c_float))]

class METADATA(Structure):
    _fields_ = [("classes", c_int),
                ("names", POINTER(c_char_p))]


#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL)
lib = CDLL("/home/training/mxj/darknet/darknet.so", RTLD_GLOBAL)
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int

predict = lib.network_predict
predict.argtypes = [c_void_p, POINTER(c_float)]
predict.restype = POINTER(c_float)

set_gpu = lib.cuda_set_device
set_gpu.argtypes = [c_int]

make_image = lib.make_image
make_image.argtypes = [c_int, c_int, c_int]
make_image.restype = IMAGE

get_network_boxes = lib.get_network_boxes
get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int),c_int]
get_network_boxes.restype = POINTER(DETECTION)

make_network_boxes = lib.make_network_boxes
make_network_boxes.argtypes = [c_void_p]
make_network_boxes.restype = POINTER(DETECTION)

free_detections = lib.free_detections
free_detections.argtypes = [POINTER(DETECTION), c_int]

free_ptrs = lib.free_ptrs
free_ptrs.argtypes = [POINTER(c_void_p), c_int]

network_predict = lib.network_predict
network_predict.argtypes = [c_void_p, POINTER(c_float)]

reset_rnn = lib.reset_rnn
reset_rnn.argtypes = [c_void_p]

load_net = lib.load_network
load_net.argtypes = [c_char_p, c_char_p, c_int]
load_net.restype = c_void_p

do_nms_obj = lib.do_nms_obj
do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]

do_nms_sort = lib.do_nms_sort
do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]

free_image = lib.free_image
free_image.argtypes = [IMAGE]

letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE

load_meta = lib.get_metadata
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA

load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE

rgbgr_image = lib.rgbgr_image
rgbgr_image.argtypes = [IMAGE]

predict_image = lib.network_predict_image
predict_image.argtypes = [c_void_p, IMAGE]
predict_image.restype = POINTER(c_float)

ndarray_image = lib.ndarray_to_image
ndarray_image.argtypes = [POINTER(c_ubyte), POINTER(c_long), POINTER(c_long)]
ndarray_image.restype = IMAGE

#net = load_net("/media/mahxn0/DATA/workspace/src/detectAndRecog/src/yolo_surface/data/robot/robot.cfg", "/media/mahxn0/DATA/workspace/src/detectAndRecog/src/yolo_surface/data/robot/robot_final.weights", 0)
#meta = load_meta("/media/mahxn0/DATA/workspace/src/detectAndRecog/src/yolo_surface/data/robot/robot.data")

net = load_net("/home/training/mxj/darknet/cfg/yolov4.cfg", "/home/training/mxj/darknet/yolov4.weights", 0)
meta = load_meta("/home/training/mxj/darknet/cfg/coco.data")

#net = load_net("/home/mahxn0/darknet/watch_shunhua/watch.cfg", "/home/mahxn0/darknet/watch_shunhua/watch_final.weights", 0)
#meta = load_meta("/home/mahxn0/darknet/watch_shunhua/watch.data")

#net = load_net('/home/mahxn0/darknet/cfg/yolov3.cfg', '/home/mahxn0/darknet/yolov3.weights', 0)
#meta = load_meta("/home/mahxn0/darknet/cfg/coco.data")
#net = load_net("/home/mahxn0/ROS_workspace/darknet/cfg/yolov3-tiny.cfg", "/home/mahxn0/ROS_workspace/darknet/yolov3-tiny.weights", 0)
#meta = load_meta("/home/mahxn0/ROS_workspace/darknet/cfg/coco.data")
#video =cv2.VideoCapture(0)
class yolo_helmet(object):
    def __init__(self):
    pass
    def detect_pic(self, image, thresh=0.25, hier_thresh=.5, nms=.45):
        im = self.nparray_to_image(image)
        num = c_int(0)
        pnum = pointer(num)
        predict_image(net, im)
        dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum,0)
        num = pnum[0]
        if (nms): do_nms_obj(dets, num, meta.classes, nms)
        res = []
        for j in range(num):
            for i in range(meta.classes):
                if dets[j].prob[i] > 0:
                    b = dets[j].bbox
                    left=(b.x-b.w/2)
                    right=(b.x+b.w/2)
                    top=(b.y-b.h/2)
                    bot=(b.y+b.h/2)
                    if left < 0:
                            left = 0
                    if right > im.w-1:
                            right = im.w-1
                    if top < 0:
                            top = 0
                    if bot > im.h-1:
                            bot = im.h-1
                    if meta.names[i] in ['bicycle','motorbike','car','bus','truck','person']:
                        res.append((meta.names[i], dets[j].prob[i],left,top,right,bot))
        res = sorted(res, key=lambda x: -x[1])
        free_image(im) #not sure if this will cause a memory leak.
        free_detections(dets, num)
        return res

    def detect(self, imagename, thresh=.25, hier_thresh=.5, nms=.45):
        t0=time.time()
        #im=self.array_to_image(image)
        #rgbgr_image(im)
        image=cv2.imread(imagename)
        im = self.nparray_to_image(image)

        t1=time.time()
        num = c_int(0)
        pnum = pointer(num)
        predict_image(net, im)
        dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum,0)
        num = pnum[0]
        if (nms): do_nms_obj(dets, num, meta.classes, nms)
        res = []
        for j in range(num):
            for i in range(meta.classes):
                if dets[j].prob[i] > 0:
                    b = dets[j].bbox
                    left=(b.x-b.w/2)-5
                    right=(b.x+b.w/2)+5
                    top=(b.y-b.h/2)-5
                    bot=(b.y+b.h/2)+5
                    if left < 0:
                            left = 0
                    if right > im.w-1:
                            right = im.w-1
                    if top < 0:
                            top = 0
                    if bot > im.h-1:
                            bot = im.h-1
                    if meta.names[i] in ['bicycle','motorbike','car','bus','truck','person']:
                        res.append((meta.names[i], dets[j].prob[i],left,top,right,bot))
                        print(meta.names[i])
        print('1111111111')
        res = sorted(res, key=lambda x: -x[1])
        print(res)
        #free_image(im) #not sure if this will cause a memory leak.
        #free_detections(dets, num)
        t2=time.time()
        print("detect take %d s",t2-t0);
        print("array_to_image take %d s",t1-t0)
        return res
    #def merge_iou(self,result_lists):
    def compute_iou(self,rect1,rect2):
        x1=np.max((rect1[0],rect2[0]))
        x2=np.min((rect1[2],rect2[2]))
        y1=np.max((rect1[1],rect2[1]))
        y2=np.min((rect1[3],rect2[3]))
        if x2<=x1 or y2<=y1:
            return 0
        intersect=(x2-x1)*(y2-y1)
        area1=(rect1[2]-rect1[0])*(rect1[3]-rect1[1])
        area2=(rect2[2]-rect2[0])*(rect2[3]-rect2[1])

        print(intersect,area1,area2)
        iou=intersect/(area1+area2-intersect)

        return iou
    def array_to_image(self,arr):
         arr = arr.transpose(2,0,1)
         c = arr.shape[0]
         h = arr.shape[1]
         w = arr.shape[2]
         arr = (arr/255.0).flatten()
         data = c_array(c_float, arr)
         im = IMAGE(w,h,c,data)
         return im

    def nparray_to_image(self,img):
        data = img.ctypes.data_as(POINTER(c_ubyte))
        image = ndarray_image(data, img.ctypes.shape, img.ctypes.strides)
        return image

    def getXY(self,i):
        return int(i)
if __name__ == "__main__":


    imgDir = '/home/training/mxj/1/'
    filenames = os.listdir(imgDir)
    r=yolo_helmet()
    for name in filenames:
        imgName = os.path.join(imgDir,name)
        if 'png' in name:
            print(imgName)
            img = cv2.imread(imgName)
            xmlName = os.path.join(imgDir,name[:-3]+'xml')
            rets = r.detect(imgName)
            if len(rets)>0:
                with open(xmlName,'w') as f:
                    f.write('\n')
                    f.write('yf\n')
                    f.write(''+name[:-4]+'\n')
                    f.write(''+imgName+'\n')
                    f.write('\n')
                    f.write('Unknown\n')
                    f.write('\n')
                    f.write('\n')
                    f.write(''+str(img.shape[1])+'\n')
                    f.write(''+str(img.shape[0])+'\n')
                    f.write(''+str(img.shape[2])+'\n')
                    f.write('\n')
                    f.write('0\n')

                    for obj in rets:
                        retClass = obj[0]
                        if retClass=='bicycle' or retClass=='motorbike' or 'car' or retClass=='bus' or retClass=='truck' or retClass=='person':
                            if retClass=='bicycle' or retClass=='motorbike':
                                retClass='nonMoto'
                            if retClass=='car' or retClass=='bus' or retClass=='truck':
                                retClass='car'
                            score = obj[1]
                            #box = [int(x) for x in obj[2]]
                            left = int(obj[2])
                            top = int(obj[3])
                            right = int(obj[4])
                            bottom = int(obj[5])
                            f.write('\n')
                    f.write('\n')
                f.close()

yolov系列持续更新

YOU ONLY LOOK ONCE

数据集准备

训练

tiny-yolov3

获取预训练模型

训练：

问题汇总：

测试:

python v2调用(已封装成python库)

pythonv3调用:

yolov4

自动给xml打标签

你可能感兴趣的:(yolov系列持续更新)