身在国外正在做毕设,期间看到了很多国内外人士写的代码。由衷的感谢各位开源大神提供的资源,让我能自学入门Computer Vistion领域。此文不仅仅是为了记录自己的学习过程,同时也想将自己更改后的代码分享出去,希望能帮助到更多对计算机视觉,机器学习感兴趣的人。同时也欢迎各位大神对我进行指导与指正!
HOG特征子 + 线性SVM (支持向量机 Support Vector Machine)分类器:
基本实现思路遵循2005年Dala 在CVPR上发表的文章, 链接如下: 2005 Dalal CVPR.
本人是在大三下学期开始自学SVM原理的,当时从网上,书籍中找了很多资料,这篇博文讲的算是很详细了,能够满足我对SVM数学原理的好奇心。当然,SVM和神经网络系统中简单的PLA感知机(Perceptrons Learning Algorithm)算法之间的相通与不通之处,并不是本文的重点。我将在后续的学习中写出来作为学习的记录。
有关HOG特征子的原理详解博客上有很多,在这里本人就不再赘述;有关SVM的数学推导,感兴趣的同学可以看一看这篇博文:理解SVM的三层境界
Haar-like特征 + Adaboost分类器做级联检测:
原本我是想用HOG +SVM 来实现,但是发现一个问题就是,HOG + SVM 貌似不能很好的解决车辆遮挡问题,所以导致效果并没有想象中的那么好。而网上有人用Haar做出的效果反而比HOG特征好很多。
有关Haar级联分类器的原理现在还没弄清楚,我将在后续进行整理。
用的是python 工具库dlib中带有的correlation_tracker.这个跟踪方法属于单目标跟踪, 原理为Danelljan等人的2014年论文《Accurate Scale Estimation for Robust Visual Tracking》实现。关联跟踪器。
车辆检测Haar链接 github
HOG + SVM的车辆训练方式我在 github 上找到了一个.xml文件,调用起来方便很多。链接暂时找不到了,稍后我上传一波文件。总之经过比较后发现还是Haar-like的效果好。
speed_check.py
import cv2
import dlib
import time
import threading
import numpy as np
# from imutils.object_detection import non_max_suppression
import math
carCascade = cv2.CascadeClassifier('myhaar.xml')
video = cv2.VideoCapture('project.mp4')
#(H, W) = frame.shape[:2]
# WIDTH = 1280
# HEIGHT = 720
def estimateSpeed(location1, location2):
d_pixels = math.sqrt(math.pow(location2[0] - location1[0], 2) + math.pow(location2[1] - location1[1], 2))
# ppm = location2[2] / carWidht
ppm = 8.8
d_meters = d_pixels / ppm
#print("d_pixels=" + str(d_pixels), "d_meters=" + str(d_meters))
fps = 18
speed = d_meters * fps * 3.6
return speed
def trackMultipleObjects():
# out = None
counter_down = []
counter_up = []
line_down = np.array([[100, 360], [800, 360]])
line_up = np.array([[965, 425], [1280, 310]])
line_pass_left = np.array([[115, 200], [115, 320]])
line_pass_right = np.array([[930, 200], [930, 320]])
mask_vehicle_and_people = np.array([[line_down[0],line_down[1],[940,720],[0,720]],
[line_up[0], line_up[1], [1280, 720], [1260, 720]],
[line_pass_left[0], line_pass_left[1], line_pass_right[1], line_pass_right[0]]])
rectangleColor = (0, 255, 0)
frameCounter = 0
currentCarID = 0
fps = 0
carTracker = {}
carNumbers = {}
carLocation1 = {}
carLocation2 = {}
speed = [None] * 1000
HEIGHT = 720
WIDTH = 1280
EXIT_COLOR0 = (6, 43, 42)
# Write output to video file
out = cv2.VideoWriter('project_output_haar1.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (WIDTH, HEIGHT))
while True:
start_time = time.time()
rc, image = video.read()
if type(image) == type(None):
break
image = cv2.resize(image, (WIDTH, HEIGHT))
resultImage = image.copy()
frameCounter = frameCounter + 1
carIDtoDelete = []
for carID in carTracker.keys():
trackingQuality = carTracker[carID].update(image)
if trackingQuality < 7:
carIDtoDelete.append(carID)
for carID in carIDtoDelete:
print ('Removing carID ' + str(carID) + ' from list of trackers.')
print ('Removing carID ' + str(carID) + ' previous location.')
print ('Removing carID ' + str(carID) + ' current location.')
carTracker.pop(carID, None)
carLocation1.pop(carID, None)
carLocation2.pop(carID, None)
# detecting
if not (frameCounter % 10):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cars = carCascade.detectMultiScale(gray, 1.1, 13, 18, (24, 24))
# cars, weight = carCascade.detectMultiScale(image, winStride=(4, 4),
# padding=(8, 8), scale=1.05)
for (_x, _y, _w, _h) in cars:
x = int(_x)
y = int(_y)
w = int(_w)
h = int(_h)
x_bar = x + 0.5 * w
y_bar = y + 0.5 * h
matchCarID = None
for carID in carTracker.keys():
trackedPosition = carTracker[carID].get_position()
t_x = int(trackedPosition.left())
t_y = int(trackedPosition.top())
t_w = int(trackedPosition.width())
t_h = int(trackedPosition.height())
t_x_bar = t_x + 0.5 * t_w
t_y_bar = t_y + 0.5 * t_h
if ((t_x <= x_bar <= (t_x + t_w)) and (t_y <= y_bar <= (t_y + t_h)) and (x <= t_x_bar <= (x + w)) and (y <= t_y_bar <= (y + h))):
matchCarID = carID
# if new car exists
if matchCarID is None:
print ('Creating new tracker ' + str(currentCarID))
tracker = dlib.correlation_tracker()
tracker.start_track(image, dlib.rectangle(x , y , x + w , y + h ))
carTracker[currentCarID] = tracker
carLocation1[currentCarID] = [x , y , x + w , y + h ]
currentCarID = currentCarID + 1
# cv2.line(resultImage,line_down[0],line_down[1],(255,0,0),5)
# cv2.line(resultImage, line_up[0], line_up[1], (255, 0, 0), 5)
base = np.zeros((HEIGHT, WIDTH) + (3,), dtype='uint8')
exit_masks = [cv2.fillPoly(base, mask_vehicle_and_people, (255, 255, 255))[:, :, 0]]
# print(exit_masks)
for exit_mask in exit_masks:
_img = np.zeros(resultImage.shape, resultImage.dtype)
_img[:, :] = EXIT_COLOR0
mask = cv2.bitwise_and(_img, _img, mask=exit_mask)
cv2.addWeighted(mask, 1, resultImage, 1, 0, resultImage)
# tracking
for carID in carTracker.keys():
trackedPosition = carTracker[carID].get_position()
t_x = int(trackedPosition.left())
t_y = int(trackedPosition.top())
t_w = int(trackedPosition.width())
t_h = int(trackedPosition.height())
cv2.rectangle(resultImage, (t_x, t_y), (t_x + t_w, t_y + t_h), rectangleColor, 3)
cv2.putText(resultImage, "V", (t_x, t_y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 255, 0), 2)
center_x = t_x + t_w//2
center_y = t_y + t_h//2
if (line_down[0][0] <= center_x <= line_down[1][0]) and (center_y >= line_down[0][1]):
counter_down.append(carID)
if (line_up[0][0] <= center_x <= line_up[1][0]) and (line_up[1][1] <= center_y <= line_up[0][1]):
counter_up.append(carID)
# speed estimation
carLocation2[carID] = [t_x, t_y, t_w, t_h]
info = [
("Vehicle Up", len(set(counter_up))),
("Vehicle Down", len(set(counter_down))),
]
for (i, (k, v)) in enumerate(info):
text = "{}: {}".format(k, v)
cv2.putText(resultImage, text, (10, HEIGHT - ((i * 20) + 20)),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
end_time = time.time()
if not (end_time == start_time):
fps = 1.0/(end_time - start_time)
#cv2.putText(resultImage, 'FPS: ' + str(int(fps)), (620, 30),cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
for i in carLocation1.keys():
if frameCounter % 1 == 0:
[x1, y1, w1, h1] = carLocation1[i]
[x2, y2, w2, h2] = carLocation2[i]
# print 'previous location: ' + str(carLocation1[i]) + ', current location: ' + str(carLocation2[i])
carLocation1[i] = [x2, y2, w2, h2]
# print 'new previous location: ' + str(carLocation1[i])
if [x1, y1, w1, h1] != [x2, y2, w2, h2]:
if (speed[i] == None or speed[i] == 0) and y1 >= 255 and y1 <= 285:
speed[i] = estimateSpeed([x1, y1, w1, h1], [x2, y2, w2, h2])
if speed[i] is None:
continue
#if y1 > 275 and y1 < 285:
if (speed[i] >= 15) and (y1 >= 180):
cv2.putText(resultImage, str(int(speed[i])) + " km/hr", (int(x1 + w1/2), int(y1-5)),cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 255, 255), 2)
elif speed[i] < 15:
cv2.putText(resultImage, " not a car", (int(x1 + w1 / 2), int(y1 - 5)),
cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
cv2.rectangle(resultImage, (x2, y2), (x2 + w2, y2 + h2), (0, 0, 255), 4)
cv2.putText(resultImage, "V", (x2, y2 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 0, 255), 2)
#print ('CarID ' + str(i) + ': speed is ' + str("%.2f" % round(speed[i], 0)) + ' km/h.\n')
#else:
# cv2.putText(resultImage, "Far Object", (int(x1 + w1/2), int(y1)),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
#print ('CarID ' + str(i) + ' Location1: ' + str(carLocation1[i]) + ' Location2: ' + str(carLocation2[i]) + ' speed is ' + str("%.2f" % round(speed[i], 0)) + ' km/h.\n')
cv2.imshow('result', resultImage)
# Write the frame into the file 'output.avi'
#out.write(resultImage)
out.write(resultImage)
if cv2.waitKey(33) == 27:
break
# # check to see if we need to release the video writer pointer
# if out is not None:
# out.release()
#
# # release the video file pointer
# video.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
trackMultipleObjects()
people_tracking.py
import cv2
import dlib
import time
import threading
import numpy as np
# from imutils.object_detection import non_max_suppression
import math
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
# carCascade = cv2.CascadeClassifier('myhaar.xml')
video = cv2.VideoCapture('project_output_haar1.avi')
def estimateSpeed(location1, location2):
d_pixels = math.sqrt(math.pow(location2[0] - location1[0], 2) + math.pow(location2[1] - location1[1], 2))
# ppm = location2[2] / carWidht
ppm = 8.8
d_meters = d_pixels / ppm
# print("d_pixels=" + str(d_pixels), "d_meters=" + str(d_meters))
fps = 18
speed = d_meters * fps * 3.6
return speed
def trackMultipleObjects():
out = None
counter_right = []
counter_moto = []
counter_wrong = []
counter_wrong_detect = []
rects = []
line_pass_left = np.array([[115,200],[115,320]])
line_pass_right = np.array([[930, 200], [930, 320]])
mask_people = np.array([line_pass_left[0], line_pass_left[1], line_pass_right[1], line_pass_right[0]])
# line_down = [(0, 360), (800, 360)]
# rectangleColor = (0, 255, 0)
frameCounter = 0
currentCarID = 0
fps = 0
carTracker = {}
carNumbers = {}
carLocation1 = {}
carLocation2 = {}
speed = [None] * 1000
HEIGHT = 720
WIDTH = 1280
# EXIT_COLOR = (66, 183, 42)
# Write output to video file
out = cv2.VideoWriter('project_output_haar_and_svm1.avi', cv2.VideoWriter_fourcc('m','p','4','v'), 10, (WIDTH, HEIGHT))
while True:
start_time = time.time()
rc, image = video.read()
if type(image) == type(None):
break
image = cv2.resize(image, (WIDTH, HEIGHT))
resultImage = image.copy()
frameCounter = frameCounter + 1
carIDtoDelete = []
for carID in carTracker.keys():
trackingQuality = carTracker[carID].update(image)
if trackingQuality < 7:
carIDtoDelete.append(carID)
for carID in carIDtoDelete:
print ('Removing carID ' + str(carID) + ' from list of trackers.')
print ('Removing carID ' + str(carID) + ' previous location.')
print ('Removing carID ' + str(carID) + ' current location.')
carTracker.pop(carID, None)
carLocation1.pop(carID, None)
carLocation2.pop(carID, None)
# detecting
if not (frameCounter % 10):
#gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
(rects, weights) = hog.detectMultiScale(image, winStride=(4, 4),
padding=(8, 8), scale=1.05)
for (_x, _y, _w, _h) in rects:
x = int(_x)
y = int(_y)
w = int(_w)
h = int(_h)
x_bar = x + 0.5 * w
y_bar = y + 0.5 * h
matchCarID = None
for carID in carTracker.keys():
trackedPosition = carTracker[carID].get_position()
t_x = int(trackedPosition.left())
t_y = int(trackedPosition.top())
t_w = int(trackedPosition.width())
t_h = int(trackedPosition.height())
t_x_bar = t_x + 0.5 * t_w
t_y_bar = t_y + 0.5 * t_h
if ((t_x <= x_bar <= (t_x + t_w)) and (t_y <= y_bar <= (t_y + t_h)) and (
x <= t_x_bar <= (x + w)) and (y <= t_y_bar <= (y + h))):
matchCarID = carID
# if new car exists
if matchCarID is None:
print ('Creating new tracker ' + str(currentCarID))
tracker = dlib.correlation_tracker()
tracker.start_track(image, dlib.rectangle(x, y, x + w, y + h))
carTracker[currentCarID] = tracker
carLocation1[currentCarID] = [x, y, x + w, y + h]
currentCarID = currentCarID + 1
# cv2.line(resultImage, line_pass_right[0], line_pass_right[1], (255, 255, 0), 5)
# cv2.line(resultImage, line_pass_left[0], line_pass_left[1], (255, 255, 0), 5)
# cv2.line(resultImage, line_down[0], line_down[1], (255, 0, 0), 5)
#
# tracking
for carID in carTracker.keys():
trackedPosition = carTracker[carID].get_position()
t_x = int(trackedPosition.left())
t_y = int(trackedPosition.top())
t_w = int(trackedPosition.width())
t_h = int(trackedPosition.height())
center_x = t_x + t_w // 2
center_y = t_y + t_h // 2
# rects.append((t_x, t_y, t_w, t_h))
# cv2.circle(resultImage, (center_x, center_y ),4, (0, 255, 255), -1)
"""
if ((line_pass_left[0][0] <= center_x <= line_pass_left[1][0] + 40) and ( line_pass_left[0][1] <= center_y <= line_pass_left[1][1])) or ((line_pass_right[0][0] - 10 <= center_x <= line_pass_right[1][0]) and ( line_pass_right[0][1] <= center_y <= line_pass_right[1][1])):
counter_right.append(carID)
cv2.putText(resultImage, "Pedestrian", ( center_x - 10, center_y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
elif (line_down[0][0] <= center_x <= line_down[1][0]) and (line_down[0][1] - 20 <= center_y <= line_down[0][1] + 20):
counter_moto.append(carID)
cv2.putText(resultImage, "moto", (center_x - 10, center_y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 250, 250), 2)
else:
counter_wrong.append(carID)
cv2.putText(resultImage, "Pedestrian", (center_x - 10, center_y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
# speed estimation
carLocation2[carID] = [t_x, t_y, t_w, t_h]
info = [
("walking in right place", len(set(counter_right))),
("moto", len(set(counter_moto))),
("walking in wrong place", len(set(counter_wrong))),
]
for (i, (k, v)) in enumerate(info):
text = "{}: {} people".format(k, v)
cv2.putText(resultImage, text, (670, HEIGHT - ((i * 20) + 20)),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
"""
carLocation2[carID] = [t_x, t_y, t_w, t_h]
end_time = time.time()
if not (end_time == start_time):
fps = 1.0 / (end_time - start_time)
# cv2.putText(resultImage, 'FPS: ' + str(int(fps)), (620, 30),cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
for i in carLocation1.keys():
if frameCounter % 1 == 0:
[x1, y1, w1, h1] = carLocation1[i]
[x2, y2, w2, h2] = carLocation2[i]
# print 'previous location: ' + str(carLocation1[i]) + ', current location: ' + str(carLocation2[i])
carLocation1[i] = [x2, y2, w2, h2]
# print 'new previous location: ' + str(carLocation1[i])
if [x1, y1, w1, h1] != [x2, y2, w2, h2]:
if (speed[i] == None or speed[i] == 0):
speed[i] = estimateSpeed([x1, y1, w1, h1], [x2, y2, w2, h2])
# if y1 > 275 and y1 < 285:
if (33 <= speed[i] <= 45) and y1 >= 100:
cv2.circle(resultImage, (x1 + w1 // 2, y1 + h1 // 2), 4, (0, 255, 255), -1)
cv2.putText(resultImage, "P:" + str(int(speed[i])) + " km/hr", (int(x1 + w1 / 2), int(y1 - 5)),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
counter_right.append(i)
if speed[i] > 45:
cv2.circle(resultImage, (x1 + w1 // 2, y1 + h1 // 2), 4, (0, 255, 255), -1)
cv2.putText(resultImage, "Moto" + str(int(speed[i])) + "km/hr", (int(x1 + w1 / 2), int(y1 - 5)),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2)
counter_moto.append(i)
info = [
("walking in right place", len(set(counter_right))),
("moto", len(set(counter_moto)))
]
for (i, (k, v)) in enumerate(info):
text = "{}: {} people".format(k, v)
cv2.putText(resultImage, text, (670, HEIGHT - ((i * 20) + 20)),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
# print ('CarID ' + str(i) + ': speed is ' + str("%.2f" % round(speed[i], 0)) + ' km/h.\n')
# else:
# cv2.putText(resultImage, "Far Object", (int(x1 + w1/2), int(y1)),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
# print ('CarID ' + str(i) + ' Location1: ' + str(carLocation1[i]) + ' Location2: ' + str(carLocation2[i]) + ' speed is ' + str("%.2f" % round(speed[i], 0)) + ' km/h.\n')
cv2.imshow('result', resultImage)
# Write the frame into the file 'output.avi'
# out.write(resultImage)
out.write(resultImage)
if cv2.waitKey(33) == 27:
break
# # check to see if we need to release the video writer pointer
# if out is not None:
# out.release()
#
# # release the video file pointer
# video.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
trackMultipleObjects()
我用的视频是从youtube上面截取下来的。点击查看
不知道国内的同学能不能打开。
后续我将会把我的代码文件打包上传,如果大家在代码理解上有什么问题,欢迎在留言区留言我们一起讨论!