YOLO跌倒检测笔记
代码来源:
https://github.com/qiaoguan/Fall-detection/blob/master/demo.gif
介绍:
上面原始代码是python2/opencv2写的,会有一些函数引用会发生变化。代码是在darknet c++版本的基础上训练的,首先利用makefile文件,生成可执行文件及动态文件(.so/.a),使用命令“sudo make”生成过程中可能会遇见编译错误的问题,一般是makefile中的前两行cuda与cudnn的问题,如果ubuntu中安装了cuda和cudnn,则设置为1,否则设置为0,设置为0时编译一般没有什么问题,但是设置为1时会出现问题,一般原因的cuda的调用路径不对导致的,把NVCC-nvcc更改为NVCC=/usr/local/cuda-9.0/bin/nvcc即可,具体更改代码如下。oepncv加载图像时或后面训练好后识别时,针对python2升级python3的问题,要在路径前加b或者后面加encode("utf-8").
如果遇到cuda的问题,一般是按装版本的问题,安装过程网上很多,安装完成后,一般添加bin环境变量,libs路径,和头文件路径: cd ~ 进入根目录 vi .bashrc export PATH="/usr/local/cuda-9.0/bin:$PATH" 可执行文件 export PATH="/usr/local/cuda-9.0/include:$PATH" 二进制文件
export LD_LIBRARY_PATH=$LD_LIBRARYPATH:/usr/local/cuda-9.0/lib64 库文件
source .bashrc 激活
查看cuda版本:两种方式 驱动api和运行api版本
驱动api版本:nvidia-smi
运行api版本: cat /usr/local/cuda/version.txt
nvcc -V
cat /usr/local/cuda/include/cudnn.h
nvcc--version
libcudart.so.8.0: cannot open shared object file: No such file or directory 的解决办法_volcano_Lin 的博客-CSDN博客
成功解决AttributeError: module 'cv2.cv2' has no attribute 'CV_CAP_PROP_FPS'和 'CV_CAP_PROP_FRAME_WIDTH'_一个处女座的程序猿-CSDN博客
Opencv中FOURCC详解_持久决心的博客-CSDN博客_fourcc
Python opencv 调用摄像头时设置以MJPG等编码格式获取视频_向死而生zzz的博客-CSDN博客
解决ctypes.ArgumentError: argument 1: <class ‘TypeError‘>: wrong type_门前大橋下丶-CSDN博客
/bin/sh: 1: nvcc: not found Makefile:89: recipe for target 'obj/convolutional_kernels.o' failed_hunzhangzui9837的博客-CSDN博客
pypi
Search results · PyPI
yolo
YOLO: Real-Time Object Detection
ps:
makefile
GPU=1
CUDNN=1
OPENCV=1
OPENMP=0
DEBUG=0
ARCH= -gencode arch=compute_30,code=sm_30 \
-gencode arch=compute_35,code=sm_35 \
-gencode arch=compute_50,code=[sm_50,compute_50] \
-gencode arch=compute_52,code=[sm_52,compute_52]
# -gencode arch=compute_20,code=[sm_20,sm_21] \ This one is deprecated?
# This is what I use, uncomment if you know your arch and want to specify
# ARCH= -gencode arch=compute_52,code=compute_52
VPATH=./src/:./examples
SLIB=libdarknet.so
ALIB=libdarknet.a
EXEC=darknet
OBJDIR=./obj/
CC=gcc
# NVCC=nvcc
NVCC=/usr/local/cuda-9.0/bin/nvcc
AR=ar
ARFLAGS=rcs
OPTS=-Ofast
LDFLAGS= -lm -pthread
COMMON= -Iinclude/ -Isrc/
CFLAGS=-Wall -Wno-unknown-pragmas -Wfatal-errors -fPIC
ifeq ($(OPENMP), 1)
CFLAGS+= -fopenmp
endif
ifeq ($(DEBUG), 1)
OPTS=-O0 -g
endif
CFLAGS+=$(OPTS)
ifeq ($(OPENCV), 1)
COMMON+= -DOPENCV
CFLAGS+= -DOPENCV
LDFLAGS+= `pkg-config --libs opencv`
COMMON+= `pkg-config --cflags opencv`
endif
ifeq ($(GPU), 1)
COMMON+= -DGPU -I/usr/local/cuda-9.0/include/
CFLAGS+= -DGPU
LDFLAGS+= -L/usr/local/cuda-9.0/lib64 -lcuda -lcudart -lcublas -lcurand
endif
ifeq ($(CUDNN), 1)
COMMON+= -DCUDNN
CFLAGS+= -DCUDNN
LDFLAGS+= -lcudnn
endif
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o lstm_layer.o
EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o attention.o darknet.o
ifeq ($(GPU), 1)
LDFLAGS+= -lstdc++
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o
endif
EXECOBJ = $(addprefix $(OBJDIR), $(EXECOBJA))
OBJS = $(addprefix $(OBJDIR), $(OBJ))
DEPS = $(wildcard src/*.h) Makefile include/darknet.h
#all: obj backup results $(SLIB) $(ALIB) $(EXEC)
all: obj results $(SLIB) $(ALIB) $(EXEC)
$(EXEC): $(EXECOBJ) $(ALIB)
$(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS) $(ALIB)
$(ALIB): $(OBJS)
$(AR) $(ARFLAGS) $@ $^
$(SLIB): $(OBJS)
$(CC) $(CFLAGS) -shared $^ -o $@ $(LDFLAGS)
$(OBJDIR)%.o: %.c $(DEPS)
$(CC) $(COMMON) $(CFLAGS) -c $< -o $@
$(OBJDIR)%.o: %.cu $(DEPS)
$(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@
obj:
mkdir -p obj
backup:
mkdir -p backup
results:
mkdir -p results
.PHONY: clean
clean:
rm -rf $(OBJS) $(SLIB) $(ALIB) $(EXEC) $(EXECOBJ)
gq1.py
#Author: qiaoguan(https://github.com/qiaoguan)
import cv2
import sys
sys.path.append('python')
import darknet as dn
import time
def array_to_image(arr):
arr = arr.transpose(2,0,1)
c = arr.shape[0]
h = arr.shape[1]
w = arr.shape[2]
arr = (arr/255.0).flatten()
data = dn.c_array(dn.c_float, arr)
im = dn.IMAGE(w,h,c,data)
return im
def detect(net, meta, image, thresh=.24, hier_thresh=.5, nms=.45):
boxes = dn.make_boxes(net)
probs = dn.make_probs(net)
num = dn.num_boxes(net)
dn.network_detect(net, image, thresh, hier_thresh, nms, boxes, probs)
res = []
for j in range(num):
for i in range(meta.classes):
if probs[j][i] > 0:
res.append((meta.names[i], probs[j][i], (boxes[j].x, boxes[j].y, boxes[j].w, boxes[j].h)))
res = sorted(res, key=lambda x: -x[1])
dn.free_ptrs(dn.cast(probs, dn.POINTER(dn.c_void_p)), num)
return res
def isFall(w,h):
if float(w)/h>=1.1:
return True
else:
return False
#open the input video file
input_movie=cv2.VideoCapture('cs4.mp4')
length = int(input_movie.get(cv2.CAP_PROP_FRAME_COUNT))
# Create an output movie file (make sure resolution/frame rate matches input video!)
#get fps the size
fps = input_movie.get(cv2.CAP_PROP_FPS)
size = (int(input_movie.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(input_movie.get(cv2.CAP_PROP_FRAME_HEIGHT)))
#define the type of the output movie
output_movie = cv2.VideoWriter('out_cs4.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, size)
#output_movie = cv2.VideoWriter('output_cs1.avi', -1, fps, size)
# load network and weights
print("load network and weights")
net = dn.load_net(b"./cfg/yolo.cfg", b"./yolo.weights", 0)
meta = dn.load_meta(b"./cfg/coco.data")
res=[]
frame_number=0
while True:
# Grab a single frame of video
ret, frame = input_movie.read()
frame_number += 1
# Quit when the input video file ends
if not ret:
break
'''
# detect per 2 frame
if frame_number%2==0:
continue
'''
# append all the coordinate of the detected person to res
im = array_to_image(frame)
start=time.time()
##########################
# im="./data/cs1.png"
# im = cv2.imread(im)
# frame=im
# im = array_to_image(im)
# image= b"./data/cs1.png"
# im = dn.load_image(image, 0, 0)
###########################
r = detect(net, meta, im)
print('the whole running time is: '+str(time.time()-start))
print(r)
res=[]
for item in r:
if item[0]==b'person' or item[0]==b'dog' or item[0]==b'cat' or item[0]==b'horse':
res.append(item)
# if multiple exist, and there also contains person, preserve person only!
print('--------------')
print(res)
# if len(res)>1:
# for item in res:
# if item[0]=='person':
# res=[]
# res.append(item)
# break
# get the max rectangle
result=[]
maxArea=0
if len(res)>1:
for item in res:
if item[2][2]*item[2][3]>maxArea:
maxArea=item[2][2]*item[2][3]
result=item
elif len(res)==1:
result=res[0]
#draw the result
if(len(result)>0):
# label the result
left=int(result[2][0]-result[2][2]/2)
top=int(result[2][1]-result[2][3]/2)
right=int(result[2][0]+result[2][2]/2)
bottom=int(result[2][1]+result[2][3]/2)
#whether fall?
if isFall(result[2][2],result[2][3]):
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
# Draw a label with a name below the face
cv2.rectangle(frame, (left, bottom - 25), (right, bottom), (0, 0, 255))
font = cv2.FONT_HERSHEY_DUPLEX
cv2.putText(frame, 'Warning!!!', (left + 6, bottom - 6), font, 0.5, (255, 0, 0), 1)
else:
cv2.rectangle(frame, (left, top), (right, bottom), (255, 0, 0), 2)
'''
# label the result
for item in res:
# Draw a box around the face
name=item[0]
left=int(item[2][0]-item[2][2]/2)
top=int(item[2][1]-item[2][3]/2)
right=int(item[2][0]+item[2][2]/2)
bottom=int(item[2][1]+item[2][3]/2)
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
# Draw a label with a name below the face
cv2.rectangle(frame, (left, bottom - 25), (right, bottom), (0, 0, 255))
font = cv2.FONT_HERSHEY_DUPLEX
cv2.putText(frame, name, (left + 6, bottom - 6), font, 0.5, (255, 255, 255), 1)
'''
#Display the result
cv2.imshow('Fall detection',frame)
# Write the resulting image to the output video file
print("Writing frame {} / {}".format(frame_number, length))
output_movie.write(frame)
# Hit 'q' on the keyboard to quit!
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# All done!
input_movie.release()
cv2.destroyAllWindows()
darknet.py
from ctypes import *
import math
import random
import time
import cv2
def sample(probs):
s = sum(probs)
probs = [a/s for a in probs]
r = random.uniform(0, 1)
for i in range(len(probs)):
r = r - probs[i]
if r <= 0:
return i
return len(probs)-1
def c_array(ctype, values):
arr = (ctype*len(values))()
arr[:] = values
return arr
class BOX(Structure):
_fields_ = [("x", c_float),
("y", c_float),
("w", c_float),
("h", c_float)]
class IMAGE(Structure):
_fields_ = [("w", c_int),
("h", c_int),
("c", c_int),
("data", POINTER(c_float))]
class METADATA(Structure):
_fields_ = [("classes", c_int),
("names", POINTER(c_char_p))]
#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL)
lib = CDLL("/home/ljj/share/tensorflow-yolov3-master/Fall-detection-master/libdarknet.so", RTLD_GLOBAL)
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int
predict = lib.network_predict
predict.argtypes = [c_void_p, POINTER(c_float)]
predict.restype = POINTER(c_float)
set_gpu = lib.cuda_set_device
set_gpu.argtypes = [c_int]
make_image = lib.make_image
make_image.argtypes = [c_int, c_int, c_int]
make_image.restype = IMAGE
make_boxes = lib.make_boxes
make_boxes.argtypes = [c_void_p]
make_boxes.restype = POINTER(BOX)
free_ptrs = lib.free_ptrs
free_ptrs.argtypes = [POINTER(c_void_p), c_int]
num_boxes = lib.num_boxes
num_boxes.argtypes = [c_void_p]
num_boxes.restype = c_int
make_probs = lib.make_probs
make_probs.argtypes = [c_void_p]
make_probs.restype = POINTER(POINTER(c_float))
detect = lib.network_predict
detect.argtypes = [c_void_p, IMAGE, c_float, c_float, c_float, POINTER(BOX), POINTER(POINTER(c_float))]
reset_rnn = lib.reset_rnn
reset_rnn.argtypes = [c_void_p]
load_net = lib.load_network
load_net.argtypes = [c_char_p, c_char_p, c_int]
load_net.restype = c_void_p
free_image = lib.free_image
free_image.argtypes = [IMAGE]
letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE
load_meta = lib.get_metadata
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA
load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE
rgbgr_image = lib.rgbgr_image
rgbgr_image.argtypes = [IMAGE]
predict_image = lib.network_predict_image
predict_image.argtypes = [c_void_p, IMAGE]
predict_image.restype = POINTER(c_float)
network_detect = lib.network_detect
network_detect.argtypes = [c_void_p, IMAGE, c_float, c_float, c_float, POINTER(BOX), POINTER(POINTER(c_float))]
def classify(net, meta, im):
out = predict_image(net, im)
res = []
for i in range(meta.classes):
res.append((meta.names[i], out[i]))
res = sorted(res, key=lambda x: -x[1])
return res
def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45):
print(type(image))
# print(image.shape)
im = load_image(image, 0, 0)
print(type(im))
boxes = make_boxes(net)
probs = make_probs(net)
num = num_boxes(net)
network_detect(net, im, thresh, hier_thresh, nms, boxes, probs)
res = []
for j in range(num):
for i in range(meta.classes):
if probs[j][i] > 0:
res.append((meta.names[i], probs[j][i], (boxes[j].x, boxes[j].y, boxes[j].w, boxes[j].h)))
res = sorted(res, key=lambda x: -x[1])
free_image(im)
free_ptrs(cast(probs, POINTER(c_void_p)), num)
return res
if __name__ == "__main__":
#net = load_net("cfg/densenet201.cfg", "/home/pjreddie/trained/densenet201.weights", 0)
#im = load_image("data/wolf.jpg", 0, 0)
#meta = load_meta("cfg/imagenet1k.data")
#r = classify(net, meta, im)
#print r[:10]
# net = load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0)
net = load_net(b"../cfg/yolo.cfg", b"../yolo.weights", 0)
meta = load_meta(b"../cfg/coco.data")
start=time.time()
# input_movie = cv.VideoCapture('Video1.avi')
r = detect(net, meta, b"../data/cs1.png")
print('the whole running time is: '+str(time.time()-start))
print(r)
res=[]
for item in r:
if item[0]=='person' or item[0]=='dog':
res.append(item)
print(item)
print(item[0])
print(item[1])
print(item[2][0])
print(item[2][1])
print(item[2][2])
print(item[2][3])