基于python的行人与车辆检测和跟踪实现(HOG+SVM/HAAR)

如果想直接看代码请跳转至‘我的代码’

  • 基于python的行人与车辆检测和跟踪实现(HOG+SVM/HAAR)
    • 前言
    • 实现思想
      • 行人检测:
      • 车辆检测
      • 目标跟踪
      • 流程图
      • 参考代码
      • 我的代码

基于python的行人与车辆检测和跟踪实现(HOG+SVM/HAAR)

前言

身在国外正在做毕设,期间看到了很多国内外人士写的代码。由衷的感谢各位开源大神提供的资源,让我能自学入门Computer Vistion领域。此文不仅仅是为了记录自己的学习过程,同时也想将自己更改后的代码分享出去,希望能帮助到更多对计算机视觉,机器学习感兴趣的人。同时也欢迎各位大神对我进行指导与指正!

实现思想

行人检测:

HOG特征子 + 线性SVM (支持向量机 Support Vector Machine)分类器:

基本实现思路遵循2005年Dala 在CVPR上发表的文章, 链接如下: 2005 Dalal CVPR.

本人是在大三下学期开始自学SVM原理的,当时从网上,书籍中找了很多资料,这篇博文讲的算是很详细了,能够满足我对SVM数学原理的好奇心。当然,SVM和神经网络系统中简单的PLA感知机(Perceptrons Learning Algorithm)算法之间的相通与不通之处,并不是本文的重点。我将在后续的学习中写出来作为学习的记录。
有关HOG特征子的原理详解博客上有很多,在这里本人就不再赘述;有关SVM的数学推导,感兴趣的同学可以看一看这篇博文:理解SVM的三层境界

车辆检测

Haar-like特征 + Adaboost分类器做级联检测
原本我是想用HOG +SVM 来实现,但是发现一个问题就是,HOG + SVM 貌似不能很好的解决车辆遮挡问题,所以导致效果并没有想象中的那么好。而网上有人用Haar做出的效果反而比HOG特征好很多。

有关Haar级联分类器的原理现在还没弄清楚,我将在后续进行整理。

目标跟踪

用的是python 工具库dlib中带有的correlation_tracker.这个跟踪方法属于单目标跟踪, 原理为Danelljan等人的2014年论文《Accurate Scale Estimation for Robust Visual Tracking》实现。关联跟踪器。

流程图

Created with Raphaël 2.2.0 打开视频 行人/车辆检测(包括一些错误处理) 视频帧数%10 != 0?(%:取余) 目标跟踪 存储视频 yes no

参考代码

车辆检测Haar链接 github
HOG + SVM的车辆训练方式我在 github 上找到了一个.xml文件,调用起来方便很多。链接暂时找不到了,稍后我上传一波文件。总之经过比较后发现还是Haar-like的效果好。

我的代码

speed_check.py

import cv2
import dlib
import time
import threading
import numpy as np
# from imutils.object_detection import non_max_suppression
import math

carCascade = cv2.CascadeClassifier('myhaar.xml')
video = cv2.VideoCapture('project.mp4')
#(H, W) = frame.shape[:2]
# WIDTH = 1280
# HEIGHT = 720

def estimateSpeed(location1, location2):
   d_pixels = math.sqrt(math.pow(location2[0] - location1[0], 2) + math.pow(location2[1] - location1[1], 2))
   # ppm = location2[2] / carWidht
   ppm = 8.8
   d_meters = d_pixels / ppm
   #print("d_pixels=" + str(d_pixels), "d_meters=" + str(d_meters))
   fps = 18
   speed = d_meters * fps * 3.6
   return speed
   

def trackMultipleObjects():
   # out = None
   counter_down = []
   counter_up = []
   line_down = np.array([[100, 360], [800, 360]])
   line_up = np.array([[965, 425], [1280, 310]])
   line_pass_left = np.array([[115, 200], [115, 320]])
   line_pass_right = np.array([[930, 200], [930, 320]])
   mask_vehicle_and_people = np.array([[line_down[0],line_down[1],[940,720],[0,720]],
   						 [line_up[0], line_up[1], [1280, 720], [1260, 720]],
   						 [line_pass_left[0], line_pass_left[1], line_pass_right[1], line_pass_right[0]]])

   rectangleColor = (0, 255, 0)
   frameCounter = 0
   currentCarID = 0
   fps = 0
   
   carTracker = {}
   carNumbers = {}
   carLocation1 = {}
   carLocation2 = {}
   speed = [None] * 1000

   HEIGHT = 720
   WIDTH = 1280

   EXIT_COLOR0 = (6, 43, 42)
   # Write output to video file
   out = cv2.VideoWriter('project_output_haar1.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (WIDTH, HEIGHT))


   while True:
   	start_time = time.time()
   	rc, image = video.read()

   	if type(image) == type(None):
   		break
   	
   	image = cv2.resize(image, (WIDTH, HEIGHT))
   	resultImage = image.copy()
   	frameCounter = frameCounter + 1
   	
   	carIDtoDelete = []

   	for carID in carTracker.keys():
   		trackingQuality = carTracker[carID].update(image)
   		
   		if trackingQuality < 7:
   			carIDtoDelete.append(carID)
   			
   	for carID in carIDtoDelete:
   		print ('Removing carID ' + str(carID) + ' from list of trackers.')
   		print ('Removing carID ' + str(carID) + ' previous location.')
   		print ('Removing carID ' + str(carID) + ' current location.')
   		carTracker.pop(carID, None)
   		carLocation1.pop(carID, None)
   		carLocation2.pop(carID, None)
   	# detecting
   	if not (frameCounter % 10):
   		gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
   		cars = carCascade.detectMultiScale(gray, 1.1, 13, 18, (24, 24))
   		# cars, weight = carCascade.detectMultiScale(image, winStride=(4, 4),
   		# 					 padding=(8, 8), scale=1.05)
   		for (_x, _y, _w, _h) in cars:
   			x = int(_x)
   			y = int(_y)
   			w = int(_w)
   			h = int(_h)
   		
   			x_bar = x + 0.5 * w
   			y_bar = y + 0.5 * h
   			
   			matchCarID = None
   		
   			for carID in carTracker.keys():
   				trackedPosition = carTracker[carID].get_position()
   				
   				t_x = int(trackedPosition.left())
   				t_y = int(trackedPosition.top())
   				t_w = int(trackedPosition.width())
   				t_h = int(trackedPosition.height())

   				t_x_bar = t_x + 0.5 * t_w
   				t_y_bar = t_y + 0.5 * t_h
   			
   				if ((t_x <= x_bar <= (t_x + t_w)) and (t_y <= y_bar <= (t_y + t_h)) and (x <= t_x_bar <= (x + w)) and (y <= t_y_bar <= (y + h))):
   					matchCarID = carID
   		# if new car exists
   			if matchCarID is None:
   				print ('Creating new tracker ' + str(currentCarID))
   				
   				tracker = dlib.correlation_tracker()
   				tracker.start_track(image, dlib.rectangle(x , y , x + w , y + h ))
   				
   				carTracker[currentCarID] = tracker
   				carLocation1[currentCarID] = [x , y , x + w , y + h ]

   				currentCarID = currentCarID + 1
   	
   	# cv2.line(resultImage,line_down[0],line_down[1],(255,0,0),5)
   	# cv2.line(resultImage, line_up[0], line_up[1], (255, 0, 0), 5)

   	base = np.zeros((HEIGHT, WIDTH) + (3,), dtype='uint8')
   	exit_masks = [cv2.fillPoly(base, mask_vehicle_and_people, (255, 255, 255))[:, :, 0]]
   	# print(exit_masks)
   	for exit_mask in exit_masks:
   		_img = np.zeros(resultImage.shape, resultImage.dtype)
   		_img[:, :] = EXIT_COLOR0
   		mask = cv2.bitwise_and(_img, _img, mask=exit_mask)
   		cv2.addWeighted(mask, 1, resultImage, 1, 0, resultImage)

   	# tracking

   	for carID in carTracker.keys():
   		trackedPosition = carTracker[carID].get_position()
   				
   		t_x = int(trackedPosition.left())
   		t_y = int(trackedPosition.top())
   		t_w = int(trackedPosition.width())
   		t_h = int(trackedPosition.height())


   		cv2.rectangle(resultImage, (t_x, t_y), (t_x + t_w, t_y + t_h), rectangleColor, 3)
   		cv2.putText(resultImage, "V", (t_x, t_y - 10),
   					cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 255, 0), 2)
   		center_x = t_x + t_w//2
   		center_y = t_y + t_h//2

   		if (line_down[0][0] <= center_x <= line_down[1][0]) and (center_y >= line_down[0][1]):
   			counter_down.append(carID)
   		if (line_up[0][0] <= center_x <= line_up[1][0]) and (line_up[1][1] <= center_y <= line_up[0][1]):
   			counter_up.append(carID)

   		# speed estimation
   		carLocation2[carID] = [t_x, t_y, t_w, t_h]
   	info = [
   		("Vehicle Up", len(set(counter_up))),
   		("Vehicle Down", len(set(counter_down))),
   		]
   	for (i, (k, v)) in enumerate(info):
   		text = "{}: {}".format(k, v)
   		cv2.putText(resultImage, text, (10, HEIGHT  - ((i * 20) + 20)),
   					cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
   	end_time = time.time()
   	
   	if not (end_time == start_time):
   		fps = 1.0/(end_time - start_time)
   	
   	#cv2.putText(resultImage, 'FPS: ' + str(int(fps)), (620, 30),cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)


   	for i in carLocation1.keys():	
   		if frameCounter % 1 == 0:
   			[x1, y1, w1, h1] = carLocation1[i]
   			[x2, y2, w2, h2] = carLocation2[i]
   	
   			# print 'previous location: ' + str(carLocation1[i]) + ', current location: ' + str(carLocation2[i])
   			carLocation1[i] = [x2, y2, w2, h2]
   	
   			# print 'new previous location: ' + str(carLocation1[i])
   			if [x1, y1, w1, h1] != [x2, y2, w2, h2]:
   				if (speed[i] == None or speed[i] == 0) and y1 >= 255 and y1 <= 285:
   					speed[i] = estimateSpeed([x1, y1, w1, h1], [x2, y2, w2, h2])
   				if speed[i] is None:
   					continue
   				#if y1 > 275 and y1 < 285:
   				if (speed[i] >= 15) and (y1 >= 180):
   					cv2.putText(resultImage, str(int(speed[i])) + " km/hr", (int(x1 + w1/2), int(y1-5)),cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 255, 255), 2)
   				elif speed[i] < 15:
   					cv2.putText(resultImage, " not a car", (int(x1 + w1 / 2), int(y1 - 5)),
   							   cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
   					cv2.rectangle(resultImage, (x2, y2), (x2 + w2, y2 + h2), (0, 0, 255), 4)
   					cv2.putText(resultImage, "V", (x2, y2 - 10),
   								cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 0, 255), 2)
   				#print ('CarID ' + str(i) + ': speed is ' + str("%.2f" % round(speed[i], 0)) + ' km/h.\n')

   				#else:
   				#	cv2.putText(resultImage, "Far Object", (int(x1 + w1/2), int(y1)),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

   					#print ('CarID ' + str(i) + ' Location1: ' + str(carLocation1[i]) + ' Location2: ' + str(carLocation2[i]) + ' speed is ' + str("%.2f" % round(speed[i], 0)) + ' km/h.\n')


   	cv2.imshow('result', resultImage)
   	# Write the frame into the file 'output.avi'
   	#out.write(resultImage)

   	out.write(resultImage)


   	if cv2.waitKey(33) == 27:
   		break
   # # check to see if we need to release the video writer pointer
   # if out is not None:
   # 	out.release()
   #
   # # release the video file pointer
   # video.release()
   cv2.destroyAllWindows()

if __name__ == '__main__':
   trackMultipleObjects()

people_tracking.py

import cv2
import dlib
import time
import threading
import numpy as np
# from imutils.object_detection import non_max_suppression
import math

hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
# carCascade = cv2.CascadeClassifier('myhaar.xml')
video = cv2.VideoCapture('project_output_haar1.avi')

def estimateSpeed(location1, location2):
   d_pixels = math.sqrt(math.pow(location2[0] - location1[0], 2) + math.pow(location2[1] - location1[1], 2))
   # ppm = location2[2] / carWidht
   ppm = 8.8
   d_meters = d_pixels / ppm
   # print("d_pixels=" + str(d_pixels), "d_meters=" + str(d_meters))
   fps = 18
   speed = d_meters * fps * 3.6
   return speed


def trackMultipleObjects():
   out = None
   counter_right = []
   counter_moto = []
   counter_wrong = []
   counter_wrong_detect = []
   rects = []

   line_pass_left = np.array([[115,200],[115,320]])
   line_pass_right = np.array([[930, 200], [930, 320]])
   mask_people = np.array([line_pass_left[0], line_pass_left[1], line_pass_right[1], line_pass_right[0]])

   # line_down = [(0, 360), (800, 360)]
   # rectangleColor = (0, 255, 0)
   frameCounter = 0
   currentCarID = 0
   fps = 0

   carTracker = {}
   carNumbers = {}
   carLocation1 = {}
   carLocation2 = {}
   speed = [None] * 1000

   HEIGHT = 720
   WIDTH = 1280
   # EXIT_COLOR = (66, 183, 42)
   # Write output to video file
   out = cv2.VideoWriter('project_output_haar_and_svm1.avi', cv2.VideoWriter_fourcc('m','p','4','v'), 10, (WIDTH, HEIGHT))

   while True:
       start_time = time.time()
       rc, image = video.read()

       if type(image) == type(None):
           break

       image = cv2.resize(image, (WIDTH, HEIGHT))
       resultImage = image.copy()
       frameCounter = frameCounter + 1

       carIDtoDelete = []

       for carID in carTracker.keys():
           trackingQuality = carTracker[carID].update(image)

           if trackingQuality < 7:
               carIDtoDelete.append(carID)

       for carID in carIDtoDelete:
           print ('Removing carID ' + str(carID) + ' from list of trackers.')
           print ('Removing carID ' + str(carID) + ' previous location.')
           print ('Removing carID ' + str(carID) + ' current location.')
           carTracker.pop(carID, None)
           carLocation1.pop(carID, None)
           carLocation2.pop(carID, None)
       # detecting
       if not (frameCounter % 10):
           #gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
           (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4),
                                                   padding=(8, 8), scale=1.05)

           for (_x, _y, _w, _h) in rects:
               x = int(_x)
               y = int(_y)
               w = int(_w)
               h = int(_h)

               x_bar = x + 0.5 * w
               y_bar = y + 0.5 * h

               matchCarID = None

               for carID in carTracker.keys():
                   trackedPosition = carTracker[carID].get_position()

                   t_x = int(trackedPosition.left())
                   t_y = int(trackedPosition.top())
                   t_w = int(trackedPosition.width())
                   t_h = int(trackedPosition.height())

                   t_x_bar = t_x + 0.5 * t_w
                   t_y_bar = t_y + 0.5 * t_h

                   if ((t_x <= x_bar <= (t_x + t_w)) and (t_y <= y_bar <= (t_y + t_h)) and (
                           x <= t_x_bar <= (x + w)) and (y <= t_y_bar <= (y + h))):
                       matchCarID = carID
               # if new car exists
               if matchCarID is None:
                   print ('Creating new tracker ' + str(currentCarID))

                   tracker = dlib.correlation_tracker()
                   tracker.start_track(image, dlib.rectangle(x, y, x + w, y + h))

                   carTracker[currentCarID] = tracker
                   carLocation1[currentCarID] = [x, y, x + w, y + h]

                   currentCarID = currentCarID + 1

       # cv2.line(resultImage, line_pass_right[0], line_pass_right[1], (255, 255, 0), 5)
       # cv2.line(resultImage, line_pass_left[0], line_pass_left[1], (255, 255, 0), 5)
       # cv2.line(resultImage, line_down[0], line_down[1], (255, 0, 0), 5)
       #
       # tracking

       for carID in carTracker.keys():
           trackedPosition = carTracker[carID].get_position()

           t_x = int(trackedPosition.left())
           t_y = int(trackedPosition.top())
           t_w = int(trackedPosition.width())
           t_h = int(trackedPosition.height())
           center_x = t_x + t_w // 2
           center_y = t_y + t_h // 2
           # rects.append((t_x, t_y, t_w, t_h))
           # cv2.circle(resultImage, (center_x, center_y ),4, (0, 255, 255), -1)

           """
           if ((line_pass_left[0][0] <= center_x <= line_pass_left[1][0] + 40) and ( line_pass_left[0][1] <= center_y <= line_pass_left[1][1])) or ((line_pass_right[0][0] - 10 <= center_x <= line_pass_right[1][0]) and ( line_pass_right[0][1] <= center_y <= line_pass_right[1][1])):
               counter_right.append(carID)
               cv2.putText(resultImage, "Pedestrian", ( center_x - 10, center_y - 10),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
           elif (line_down[0][0] <= center_x <= line_down[1][0]) and (line_down[0][1] - 20 <= center_y <= line_down[0][1] + 20):
               counter_moto.append(carID)
               cv2.putText(resultImage, "moto", (center_x - 10, center_y - 10),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 250, 250), 2)
           
           else:
               counter_wrong.append(carID)
               cv2.putText(resultImage, "Pedestrian", (center_x - 10, center_y - 10),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
           # speed estimation
           carLocation2[carID] = [t_x, t_y, t_w, t_h]
       info = [
           ("walking in right place", len(set(counter_right))),
           ("moto", len(set(counter_moto))),
           
           ("walking in wrong place", len(set(counter_wrong))),
       ]
       for (i, (k, v)) in enumerate(info):
           text = "{}: {} people".format(k, v)
           cv2.putText(resultImage, text, (670, HEIGHT - ((i * 20) + 20)),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
       """
           carLocation2[carID] = [t_x, t_y, t_w, t_h]
       end_time = time.time()

       if not (end_time == start_time):
           fps = 1.0 / (end_time - start_time)

       # cv2.putText(resultImage, 'FPS: ' + str(int(fps)), (620, 30),cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)

       for i in carLocation1.keys():
           if frameCounter % 1 == 0:
               [x1, y1, w1, h1] = carLocation1[i]
               [x2, y2, w2, h2] = carLocation2[i]

               # print 'previous location: ' + str(carLocation1[i]) + ', current location: ' + str(carLocation2[i])
               carLocation1[i] = [x2, y2, w2, h2]

               # print 'new previous location: ' + str(carLocation1[i])
               if [x1, y1, w1, h1] != [x2, y2, w2, h2]:
                   if (speed[i] == None or speed[i] == 0):
                       speed[i] = estimateSpeed([x1, y1, w1, h1], [x2, y2, w2, h2])

                   # if y1 > 275 and y1 < 285:
                   if (33 <= speed[i] <= 45) and y1 >= 100:
                       cv2.circle(resultImage, (x1 + w1 // 2, y1 + h1 // 2), 4, (0, 255, 255), -1)
                       cv2.putText(resultImage, "P:" + str(int(speed[i])) + " km/hr", (int(x1 + w1 / 2), int(y1 - 5)),
                                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
                       counter_right.append(i)
                       if speed[i] > 45:
                           cv2.circle(resultImage, (x1 + w1 // 2, y1 + h1 // 2), 4, (0, 255, 255), -1)
                           cv2.putText(resultImage, "Moto" + str(int(speed[i])) + "km/hr", (int(x1 + w1 / 2), int(y1 - 5)),
                                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2)
                           counter_moto.append(i)


               info = [
                   ("walking in right place", len(set(counter_right))),
                   ("moto", len(set(counter_moto)))
               ]
               for (i, (k, v)) in enumerate(info):
                   text = "{}: {} people".format(k, v)
                   cv2.putText(resultImage, text, (670, HEIGHT - ((i * 20) + 20)),
                               cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
               # print ('CarID ' + str(i) + ': speed is ' + str("%.2f" % round(speed[i], 0)) + ' km/h.\n')

               # else:
               #	cv2.putText(resultImage, "Far Object", (int(x1 + w1/2), int(y1)),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

               # print ('CarID ' + str(i) + ' Location1: ' + str(carLocation1[i]) + ' Location2: ' + str(carLocation2[i]) + ' speed is ' + str("%.2f" % round(speed[i], 0)) + ' km/h.\n')

       cv2.imshow('result', resultImage)
       # Write the frame into the file 'output.avi'
       # out.write(resultImage)

       out.write(resultImage)

       if cv2.waitKey(33) == 27:
           break
   # # check to see if we need to release the video writer pointer
   # if out is not None:
   # 	out.release()
   #
   # # release the video file pointer
   # video.release()
   cv2.destroyAllWindows()


if __name__ == '__main__':
   trackMultipleObjects()

我用的视频是从youtube上面截取下来的。点击查看
不知道国内的同学能不能打开。
后续我将会把我的代码文件打包上传,如果大家在代码理解上有什么问题,欢迎在留言区留言我们一起讨论!

你可能感兴趣的:(计算机视觉)