兜兜转转现在又要用上Yolo框架,不过现在是应用在视频上,所以写点东西记录下吧。
由于现在使用的是python,所以还是打算用darknet提供的python接口去实现》
参考自:https://github.com/pjreddie/darknet/issues/289
1.修改src/image.c
大概是在537~560行之间插入吧,灵活插入就行
#ifdef NUMPY
image ndarray_to_image(unsigned char* src, long* shape, long* strides)
{
int h = shape[0];
int w = shape[1];
int c = shape[2];
int step_h = strides[0];
int step_w = strides[1];
int step_c = strides[2];
image im = make_image(w, h, c);
int i, j, k;
int index1, index2 = 0;
for(i = 0; i < h; ++i){
for(k= 0; k < c; ++k){
for(j = 0; j < w; ++j){
index1 = k*w*h + i*w + j;
index2 = step_h*i + step_w*j + step_c*k;
//fprintf(stderr, "w=%d h=%d c=%d step_w=%d step_h=%d step_c=%d \n", w, h, c, step_w, step_h, step_c);
//fprintf(stderr, "im.data[%d]=%u data[%d]=%f \n", index1, src[index2], index2, src[index2]/255.);
im.data[index1] = src[index2]/255.;
}
}
}
rgbgr_image(im);
return im;
}
#endif
2.然后在src/image.h大概22行插入:
#ifdef NUMPY
image ndarray_to_image(unsigned char* src, long* shape, long* strides);
#endif
不要插入到OPENCV那快去。。
3.修改Makefile文件:
GPU=1
CUDNN=1
OPENCV=1
# 添加
NUMPY=1
OPENMP=1
DEBUG=1
ARCH= -gencode arch=compute_30,code=sm_30 \
-gencode arch=compute_35,code=sm_35 \
-gencode arch=compute_50,code=[sm_50,compute_50] \
-gencode arch=compute_52,code=[sm_52,compute_52] \
-gencode arch=compute_70,code=[sm_70,compute_70] \
-gencode arch=compute_75,code=[sm_75,compute_75]
# -gencode arch=compute_20,code=[sm_20,sm_21] \ This one is deprecated?
# This is what I use, uncomment if you know your arch and want to specify
# ARCH= -gencode arch=compute_52,code=compute_52
VPATH=./src/:./examples
SLIB=libdarknet.so
ALIB=libdarknet.a
EXEC=darknet
OBJDIR=./obj/
CC=gcc
CPP=g++
NVCC=nvcc
AR=ar
ARFLAGS=rcs
OPTS=-Ofast
LDFLAGS= -lm -pthread
COMMON= -Iinclude/ -Isrc/
CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC
ifeq ($(OPENMP), 1)
CFLAGS+= -fopenmp
endif
ifeq ($(DEBUG), 1)
OPTS=-O0 -g
endif
CFLAGS+=$(OPTS)
ifeq ($(OPENCV), 1)
COMMON+= -DOPENCV
CFLAGS+= -DOPENCV
LDFLAGS+= `pkg-config --libs opencv` -lstdc++
COMMON+= `pkg-config --cflags opencv`
endif
# 添加
ifeq ($(NUMPY), 1)
COMMON+= -DNUMPY -I/home/lijinlong/anaconda3/envs/tf_models/include/python3.6m/ -I/home/lijinlong/anaconda3/envs/tf_models/lib/python3.6/site-packages/numpy/core/include/numpy/
CFLAGS+= -DNUMPY
endif
ifeq ($(GPU), 1)
COMMON+= -DGPU -I/usr/local/cuda-10.0-cudnn-7.3.1/include/
CFLAGS+= -DGPU
LDFLAGS+= -L/usr/local/cuda-10.0-cudnn-7.3.1/lib64 -lcuda -lcudart -lcublas -lcurand
endif
ifeq ($(CUDNN), 1)
COMMON+= -DCUDNN
CFLAGS+= -DCUDNN
LDFLAGS+= -lcudnn
endif
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o lstm_layer.o l2norm_layer.o yolo_layer.o iseg_layer.o image_opencv.o
EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o instance-segmenter.o darknet.o
ifeq ($(GPU), 1)
LDFLAGS+= -lstdc++
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o
endif
EXECOBJ = $(addprefix $(OBJDIR), $(EXECOBJA))
OBJS = $(addprefix $(OBJDIR), $(OBJ))
DEPS = $(wildcard src/*.h) Makefile include/darknet.h
all: obj backup results $(SLIB) $(ALIB) $(EXEC)
#all: obj results $(SLIB) $(ALIB) $(EXEC)
$(EXEC): $(EXECOBJ) $(ALIB)
$(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS) $(ALIB)
$(ALIB): $(OBJS)
$(AR) $(ARFLAGS) $@ $^
$(SLIB): $(OBJS)
$(CC) $(CFLAGS) -shared $^ -o $@ $(LDFLAGS)
$(OBJDIR)%.o: %.cpp $(DEPS)
$(CPP) $(COMMON) $(CFLAGS) -c $< -o $@
$(OBJDIR)%.o: %.c $(DEPS)
$(CC) $(COMMON) $(CFLAGS) -c $< -o $@
$(OBJDIR)%.o: %.cu $(DEPS)
$(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@
obj:
mkdir -p obj
backup:
mkdir -p backup
results:
mkdir -p results
.PHONY: clean
clean:
rm -rf $(OBJS) $(SLIB) $(ALIB) $(EXEC) $(EXECOBJ) $(OBJDIR)/*
4.然后make clean再重新make -j8就可以
5.修改python/darknet.py文件,这里我直接贴完整的程序了
from ctypes import *
import math
import random
import time
import numpy as np
import cv2
import os
import sys
def sample(probs):
s = sum(probs)
probs = [a/s for a in probs]
r = random.uniform(0, 1)
for i in range(len(probs)):
r = r - probs[i]
if r <= 0:
return i
return len(probs)-1
def c_array(ctype, values):
arr = (ctype*len(values))()
arr[:] = values
return arr
class BOX(Structure):
_fields_ = [("x", c_float),
("y", c_float),
("w", c_float),
("h", c_float)]
class DETECTION(Structure):
_fields_ = [("bbox", BOX),
("classes", c_int),
("prob", POINTER(c_float)),
("mask", POINTER(c_float)),
("objectness", c_float),
("sort_class", c_int)]
class IMAGE(Structure):
_fields_ = [("w", c_int),
("h", c_int),
("c", c_int),
("data", POINTER(c_float))]
class METADATA(Structure):
_fields_ = [("classes", c_int),
("names", POINTER(c_char_p))]
#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL)
lib = CDLL("../libdarknet.so", RTLD_GLOBAL)
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int
predict = lib.network_predict
predict.argtypes = [c_void_p, POINTER(c_float)]
predict.restype = POINTER(c_float)
set_gpu = lib.cuda_set_device
set_gpu.argtypes = [c_int]
make_image = lib.make_image
make_image.argtypes = [c_int, c_int, c_int]
make_image.restype = IMAGE
get_network_boxes = lib.get_network_boxes
get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)]
get_network_boxes.restype = POINTER(DETECTION)
make_network_boxes = lib.make_network_boxes
make_network_boxes.argtypes = [c_void_p]
make_network_boxes.restype = POINTER(DETECTION)
free_detections = lib.free_detections
free_detections.argtypes = [POINTER(DETECTION), c_int]
free_ptrs = lib.free_ptrs
free_ptrs.argtypes = [POINTER(c_void_p), c_int]
network_predict = lib.network_predict
network_predict.argtypes = [c_void_p, POINTER(c_float)]
reset_rnn = lib.reset_rnn
reset_rnn.argtypes = [c_void_p]
load_net = lib.load_network
load_net.argtypes = [c_char_p, c_char_p, c_int]
load_net.restype = c_void_p
do_nms_obj = lib.do_nms_obj
do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
do_nms_sort = lib.do_nms_sort
do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
free_image = lib.free_image
free_image.argtypes = [IMAGE]
letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE
load_meta = lib.get_metadata
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA
load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE
# 添加以处理视频
ndarray_image = lib.ndarray_to_image
ndarray_image.argtypes = [POINTER(c_ubyte), POINTER(c_long), POINTER(c_long)]
ndarray_image.restype = IMAGE
rgbgr_image = lib.rgbgr_image
rgbgr_image.argtypes = [IMAGE]
predict_image = lib.network_predict_image
predict_image.argtypes = [c_void_p, IMAGE]
predict_image.restype = POINTER(c_float)
def classify(net, meta, im):
out = predict_image(net, im)
res = []
for i in range(meta.classes):
res.append((meta.names[i], out[i]))
res = sorted(res, key=lambda x: -x[1])
return res
"""
Yolo-v3目前耗时过长的步骤
1.输入图像的预处理阶段
2.python接口调用网络执行一次推理过程
"""
def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45):
# preprocess_image_time = time.time()
# 大约0.1131s
im = load_image(image, 0, 0)
# print("Yolo Preprocess image time in python version:", (time.time() - preprocess_image_time))
num = c_int(0)
pnum = pointer(num)
# start_time = time.time()
# 大概0.129秒左右
predict_image(net, im)
# print("Yolo Do inference time in python version:", (time.time() - start_time))
# get_detection_time = time.time()
# 大约0.0022s
dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum)
# print("Yolo Get detections time in python version:", (time.time() - get_detection_time))
num = pnum[0]
# do_nms_time = time.time()
# 可以忽略不计
if (nms): do_nms_obj(dets, num, meta.classes, nms)
# print("Yolo Do nms time in python version:", (time.time() - do_nms_time))
res = []
for j in range(num):
for i in range(meta.classes):
if dets[j].prob[i] > 0:
b = dets[j].bbox
res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h)))
res = sorted(res, key=lambda x: -x[1])
free_image(im)
free_detections(dets, num)
return res
# 添加以处理视频
def detect_im(net, meta, im, thresh=.5, hier_thresh=.5, nms=.45):
# to_image_time = time.time()
# 大约0.0012~0.0013秒
im, image = array_to_image(im)
# print("to_image time:", (time.time() - to_image_time))
# rgbgr_image_time = time.time()
# 大约0.0013秒
rgbgr_image(im)
# print("rgbgr_image time:", (time.time() - rgbgr_image_time))
num = c_int(0)
pnum = pointer(num)
# do_inference_time = time.time()
# 大约0.083秒
predict_image(net, im)
# print("Do inference time:", (time.time() - do_inference_time))
dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum)
num = pnum[0]
if (nms): do_nms_obj(dets, num, meta.classes, nms)
res = []
for j in range(num):
a = dets[j].prob[0:meta.classes]
if any(a):
ai = np.array(a).nonzero()[0]
for i in ai:
b = dets[j].bbox
res.append((meta.names[i], dets[j].prob[i],
(b.x, b.y, b.w, b.h)))
res = sorted(res, key=lambda x: -x[1])
if isinstance(image, bytes):
free_image(im)
free_detections(dets, num)
return res
def array_to_image(arr):
# need to return old values to avoid python freeing memory
arr = arr.transpose(2,0,1)
c, h, w = arr.shape[0:3]
arr = np.ascontiguousarray(arr.flat, dtype=np.float32) / 255.0
data = arr.ctypes.data_as(POINTER(c_float))
im = IMAGE(w,h,c,data)
return im, arr
def get_folderImages(folder):
all_files = os.listdir(folder)
abs_path = [os.path.join(folder, i) for i in all_files]
return abs_path
def convertBack(x, y, w, h):
xmin = int(round(x - (w / 2)))
xmax = int(round(x + (w / 2)))
ymin = int(round(y - (h / 2)))
ymax = int(round(y + (h / 2)))
return xmin, ymin, xmax, ymax
def init():
net = load_net("../cfg/yolov3.cfg".encode("utf-8"), "../cfg/yolov3.weights".encode("utf-8"), 0)
meta = load_meta("../cfg/coco.data".encode("utf-8"))
return net, meta
def image_processing():
net, meta = init()
folder = "images"
save_folder = "results"
each_process_time = []
for image_path in get_folderImages(folder):
image = cv2.imread(image_path)
start_time = time.time()
r = detect(net, meta, image_path.encode("utf-8"))
processing_time = time.time() - start_time
each_process_time.append(processing_time)
for i in range(len(r)):
x, y, w, h = r[i][2][0], r[i][2][1], r[i][2][2], r[i][2][3]
topleft, topright, bottomleft, bottomright = convertBack(float(x), float(y), float(w), float(h))
result = cv2.rectangle(
image,
(topleft, topright),
(bottomleft, bottomright),
(0, 255, 255),
2
)
cv2.putText(
result,
bytes.decode(r[i][0]),
(topleft, topright),
cv2.FONT_HERSHEY_SIMPLEX,
1.0,
(0, 0, 255),
2
)
save_path = os.path.join(save_folder, image_path.split('/')[-1].split(".jpg")[0] + "-result.jpg")
cv2.imwrite(save_path, result)
average_processing_time = np.mean(each_process_time)
print("Yolo-v3 COCO Average each Image processing Time:\n")
print(average_processing_time)
def video_processing():
set_gpu(7)
net, meta = init()
processing_path = "small.mp4"
cam = cv2.VideoCapture(processing_path)
total_frames = cam.get(cv2.CAP_PROP_FRAME_COUNT)
fps = cam.get(cv2.CAP_PROP_FPS)
frame_size = (int(cam.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT)))
# fourcc = int(cam.get(cv2.CAP_PROP_FOURCC))
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
processing_result_name = processing_path.split(".mp4")[0] + "-result.mp4"
result = cv2.VideoWriter(processing_result_name, fourcc, fps, frame_size)
timeF = 1
c = 1
print("opencv?", cam.isOpened())
print("fps:", fps)
print("decode style:", fourcc)
print("size:", frame_size)
print("total frames:", total_frames)
start_total = time.time()
while True:
frame_start = time.time()
_, img = cam.read()
if (c % timeF == 0 or c == total_frames):
if img is not None:
r = detect_im(net, meta, img)
for i in range(len(r)):
x, y, w, h = r[i][2][0], r[i][2][1], r[i][2][2], r[i][2][3]
topleft, topright, bottomleft, bottomright = convertBack(float(x), float(y), float(w), float(h))
img = cv2.rectangle(
img,
(topleft, topright),
(bottomleft, bottomright),
(0, 255, 255),
1
)
label_score = "{}:{:.2f}".format(bytes.decode(r[i][0]), r[i][1])
cv2.putText(
img,
label_score,
(topleft, topright),
cv2.FONT_HERSHEY_SIMPLEX,
1.0,
(0, 0, 255),
1
)
result.write(img)
else:
result.write(img)
c += 1
if c > total_frames:
print("Finished Processing!")
break
print("processing one frame total time:", (time.time() - frame_start))
print()
processing_time = time.time() - start_total
cam.release()
result.release()
post_compression(processing_result_name)
print("Yolo-v3 COCO one Video Process Time:\n")
print(processing_time)
if __name__ == "__main__":
#net = load_net("cfg/densenet201.cfg", "/home/pjreddie/trained/densenet201.weights", 0)
#im = load_image("data/wolf.jpg", 0, 0)
#meta = load_meta("cfg/imagenet1k.data")
#r = classify(net, meta, im)
#print r[:10]
# net = load_net("../cfg/yolov3.cfg".encode("utf-8"), "../cfg/yolov3.weights".encode("utf-8"), 0)
# meta = load_meta("../cfg/coco.data".encode("utf-8"))
# start_time = time.time()
# r = detect(net, meta, "../data/car.jpg".encode("utf-8"))
# print("Inference time:{:.4f}".format(time.time() - start_time))
# print(r)
image_processing()
# video_processing()
这里再增加一个:由于官发github上的代码检测视频的命令:
./darknet detector demo cfg/coco.data cfg/yolov3.cfg cfg/yolov3.weights python/videos/test.mp4
其中只能显示最后的结果无法保存,或者使用
./darknet detector demo cfg/coco.data cfg/yolov3.cfg cfg/yolov3.weights python/videos/test.mp4 -prefix predictions.mp4
最后是逐帧保存,也不太好,所以这里修改下源码。
1.在src/image.c里面修改show_image函数
int show_image(image p, const char *name, int ms)
{
#ifdef OPENCV
// int c = show_image_cv(p, name, ms);
// 增加保存结果视频
int c = save_video(p, name, ms);
return c;
#else
fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name);
save_image(p, name);
return -1;
#endif
}
2.在src/image_opencv.hpp里面增加一个save_video函数
int save_video(image im, const char*name, int ms)
{
// 静态数据成员,第一次调用初始化,之后调用不会在初始化
static VideoWriter* video;
Mat m = image_to_mat(im);
// imshow(name, m);
{
// 空视频对象则初始化一个对象
if(video == NULL){
const char* output_name = "predictions.avi";
// 新建视频保存对象
video = new VideoWriter(output_name, VideoWriter::fourcc('M','J','P','G'), 50, Size(im.w,im.h));
printf("\n DST output_video = %s \n", output_name);
}
// 写入每一帧的处理结果
video->write(m);
printf("\n cvWriteFrame \n");
}
int c = waitKey(ms);
if (c != -1) c = c%256;
return c;
}
3.重新make clean然后make -j64就可以。
然后就可以了,不过嗯。。python速度还是慢了。后面看看有什么办法优化速度再更新