直接接到一个小项目开发,是做公园的人流量统计,涉及到很多方面的知识,小编在这里记录一下流程,涉及行人识别,多目标追踪,匹配等知识。
参考国外博文,这是一个opencv学习的很好的网站。小编就是从这位大佬博客入手学习的计算机视觉,特此感谢。
https://www.pyimagesearch.com/2018/07/23/simple-object-tracking-with-opencv/
1、目标检测
首先采用MobileNet-ssd作为检测器,检测出目标。小编在之前博客已经写过这个,可参考
https://blog.csdn.net/xiao__run/article/details/80643919
检测出来我们可以看到如下图像。
MobileNet-SSD的效果一般,我们可以看到重叠的人当做了一个目标。
2、目标追踪
在这里追踪方法特别多,小编就将那位大佬的两种思路讲一讲
1、先贴代码,注释写在代码中。
# import the necessary packages
from scipy.spatial import distance as dist
#科学计算库,计算两个集合的欧式距离
from collections import OrderedDict
#有序字典
import numpy as np
class CentroidTracker:
def __init__(self, maxDisappeared=50, maxDistance=50):
# initialize the next unique object ID along with two ordered
# dictionaries used to keep track of mapping a given object
# ID to its centroid and number of consecutive frames it has
# been marked as "disappeared", respectively
self.nextObjectID = 0
#编号
self.objects = OrderedDict()
#字典,用来储存目标的编号与坐标
self.disappeared = OrderedDict()
#目标消失的帧数
# store the number of maximum consecutive frames a given
# object is allowed to be marked as "disappeared" until we
# need to deregister the object from tracking
self.maxDisappeared = maxDisappeared
# store the maximum distance between centroids to associate
# an object -- if the distance is larger than this maximum
# distance we'll start to mark the object as "disappeared"
self.maxDistance = maxDistance
def register(self, centroid):
# when registering an object we use the next available object
# ID to store the centroid
#新来的目标加入到字典
self.objects[self.nextObjectID] = centroid
self.disappeared[self.nextObjectID] = 0
self.nextObjectID += 1
#目标消失,删除字典里的目标
def deregister(self, objectID):
# to deregister an object ID we delete the object ID from
# both of our respective dictionaries
del self.objects[objectID]
del self.disappeared[objectID]
#最重要的一步,更新目标,即追踪目标
def update(self, rects):
# check to see if the list of input bounding box rectangles
# is empty
#首先判断下,如果木有框,则将消失的目标帧数加1,超过设置阈值,直接注销掉目标。
if len(rects) == 0:
# loop over any existing tracked objects and mark them
# as disappeared
for objectID in list(self.disappeared.keys()):
self.disappeared[objectID] += 1
# if we have reached a maximum number of consecutive
# frames where a given object has been marked as
# missing, deregister it
if self.disappeared[objectID] > self.maxDisappeared:
self.deregister(objectID)
# return early as there are no centroids or tracking info
# to update
return self.objects
# initialize an array of input centroids for the current frame
#创建一个Nx2的矩阵,其中N是这一帧目标的个数
inputCentroids = np.zeros((len(rects), 2), dtype="int")
# loop over the bounding box rectangles
for (i, (startX, startY, endX, endY)) in enumerate(rects):
# use the bounding box coordinates to derive the centroid
cX = int((startX + endX) / 2.0)
cY = int((startY + endY) / 2.0)
inputCentroids[i] = (cX, cY)
#将这一帧目标的个数的中心点加入字典。
# if we are currently not tracking any objects take the input
# centroids and register each of them
#若未追踪任何目标,则加入追踪
if len(self.objects) == 0:
for i in range(0, len(inputCentroids)):
self.register(inputCentroids[i])
# otherwise, are are currently tracking objects so we need to
# try to match the input centroids to existing object
# centroids
else:
# grab the set of object IDs and corresponding centroids
objectIDs = list(self.objects.keys())
objectCentroids = list(self.objects.values())
# compute the distance between each pair of object
# centroids and input centroids, respectively -- our
# goal will be to match an input centroid to an existing
# object centroid
D = dist.cdist(np.array(objectCentroids), inputCentroids)
#计算目标的两个集合的欧氏距离
# in order to perform this matching we must (1) find the
# smallest value in each row and then (2) sort the row
# indexes based on their minimum values so that the row
# with the smallest value as at the *front* of the index
# list
rows = D.min(axis=1).argsort()
#找出每行最小值,并求出其索引,返回列表
# next, we perform a similar process on the columns by
# finding the smallest value in each column and then
# sorting using the previously computed row index list
#
cols = D.argmin(axis=1)[rows]
#找出最近目标的匹配
# in order to determine if we need to update, register,
# or deregister an object we need to keep track of which
# of the rows and column indexes we have already examined
usedRows = set()
usedCols = set()
#建立字典
# loop over the combination of the (row, column) index
# tuples
for (row, col) in zip(rows, cols):
# if we have already examined either the row or
# column value before, ignore it
if row in usedRows or col in usedCols:
continue
#判断目标是否已经分配ID
# if the distance between centroids is greater than
# the maximum distance, do not associate the two
# centroids to the same object
#设置最大距离
if D[row, col] > self.maxDistance:
continue
# otherwise, grab the object ID for the current row,
# set its new centroid, and reset the disappeared
# counter
#正式分配ID,并将该ID对应的中心点赋予原来的目标,即更新目标点
objectID = objectIDs[row]
self.objects[objectID] = inputCentroids[col]
self.disappeared[objectID] = 0
# indicate that we have examined each of the row and
# column indexes, respectively
usedRows.add(row)
usedCols.add(col)
# compute both the row and column index we have NOT yet
# examined
#寻找未被分配的目标。
unusedRows = set(range(0, D.shape[0])).difference(usedRows)
unusedCols = set(range(0, D.shape[1])).difference(usedCols)
# in the event that the number of object centroids is
# equal or greater than the number of input centroids
# we need to check and see if some of these objects have
# potentially disappeared
#判断目标是消失还是增多,如果是消失,开始计数消失帧数,如果多余,则新加入一个目标点
if D.shape[0] >= D.shape[1]:
# loop over the unused row indexes
for row in unusedRows:
# grab the object ID for the corresponding row
# index and increment the disappeared counter
objectID = objectIDs[row]
self.disappeared[objectID] += 1
# check to see if the number of consecutive
# frames the object has been marked "disappeared"
# for warrants deregistering the object
if self.disappeared[objectID] > self.maxDisappeared:
self.deregister(objectID)
# otherwise, if the number of input centroids is greater
# than the number of existing object centroids we need to
# register each new input centroid as a trackable object
else:
for col in unusedCols:
self.register(inputCentroids[col])
# return the set of trackable objects
return self.objects
下面开始调用追踪的类,首先使用目标检测得出目标候选框,再加候选框加入到追踪算法里。
追踪代码
# USAGE
# python object_tracker.py --prototxt deploy.prototxt --model res10_300x300_ssd_iter_140000.caffemodel
# import the necessary packages
from pyimagesearch.centroidtracker import CentroidTracker
from imutils.video import VideoStream
import numpy as np
import argparse
import imutils
import time
import cv2
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--prototxt", required=True,
help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
help="path to Caffe pre-trained model")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
args = vars(ap.parse_args())
# initialize our centroid tracker and frame dimensions
ct = CentroidTracker()
(H, W) = (None, None)
# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
# initialize the video stream and allow the camera sensor to warmup
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(2.0)
# loop over the frames from the video stream
while True:
# read the next frame from the video stream and resize it
frame = vs.read()
frame = imutils.resize(frame, width=400)
# if the frame dimensions are None, grab them
if W is None or H is None:
(H, W) = frame.shape[:2]
# construct a blob from the frame, pass it through the network,
# obtain our output predictions, and initialize the list of
# bounding box rectangles
blob = cv2.dnn.blobFromImage(frame, 1.0, (W, H),
(104.0, 177.0, 123.0))
net.setInput(blob)
detections = net.forward()
rects = []
# loop over the detections
for i in range(0, detections.shape[2]):
# filter out weak detections by ensuring the predicted
# probability is greater than a minimum threshold
if detections[0, 0, i, 2] > args["confidence"]:
# compute the (x, y)-coordinates of the bounding box for
# the object, then update the bounding box rectangles list
box = detections[0, 0, i, 3:7] * np.array([W, H, W, H])
rects.append(box.astype("int"))
# draw a bounding box surrounding the object so we can
# visualize it
(startX, startY, endX, endY) = box.astype("int")
cv2.rectangle(frame, (startX, startY), (endX, endY),
(0, 255, 0), 2)
# update our centroid tracker using the computed set of bounding
# box rectangles
objects = ct.update(rects)
# loop over the tracked objects
for (objectID, centroid) in objects.items():
# draw both the ID of the object and the centroid of the
# object on the output frame
text = "ID {}".format(objectID)
cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)
# show the output frame
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()
下面我们看下效果,这里我先借用大佬的图,人脸识别的模型。
以上就是多目标追踪最为基本的模型了。同时,替换掉检测模型就可以完成对其他目标的追踪了,这里最难的应该就属于目标匹配了。
接下来我来实现一个人流量的统计;里面有很多思想值得学习,其实在几个月前我写过yolov3加卡尔曼滤波的多目标追踪,地址为
https://blog.csdn.net/xiao__run/article/details/84374959
接下来我学习大佬的方案,使用dlib的相关滤波做多目标追踪。
先上代码吧,有时间我再进行解释,这些方案都有个很大的缺点就是非常依赖检测器的好坏。
# USAGE
# To read and write back out to video:
# python people_counter.py --prototxt mobilenet_ssd/MobileNetSSD_deploy.prototxt \
# --model mobilenet_ssd/MobileNetSSD_deploy.caffemodel --input videos/example_01.mp4 \
# --output output/output_01.avi
#
# To read from webcam and write back out to disk:
# python people_counter.py --prototxt mobilenet_ssd/MobileNetSSD_deploy.prototxt \
# --model mobilenet_ssd/MobileNetSSD_deploy.caffemodel \
# --output output/webcam_output.avi
# import the necessary packages
from pyimagesearch.centroidtracker import CentroidTracker
from pyimagesearch.trackableobject import TrackableObject
from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import time
import dlib
import cv2
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--prototxt", required=True,
help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
help="path to Caffe pre-trained model")
ap.add_argument("-i", "--input", type=str,
help="path to optional input video file")
ap.add_argument("-o", "--output", type=str,
help="path to optional output video file")
ap.add_argument("-c", "--confidence", type=float, default=0.4,
help="minimum probability to filter weak detections")
ap.add_argument("-s", "--skip-frames", type=int, default=10,
help="# of skip frames between detections")
args = vars(ap.parse_args())
# initialize the list of class labels MobileNet SSD was trained to
# detect
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"]
# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
# if a video path was not supplied, grab a reference to the webcam
if not args.get("input", False):
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(2.0)
# otherwise, grab a reference to the video file
else:
print("[INFO] opening video file...")
vs = cv2.VideoCapture(args["input"])
# initialize the video writer (we'll instantiate later if need be)
writer = None
# initialize the frame dimensions (we'll set them as soon as we read
# the first frame from the video)
W = None
H = None
# instantiate our centroid tracker, then initialize a list to store
# each of our dlib correlation trackers, followed by a dictionary to
# map each unique object ID to a TrackableObject
ct = CentroidTracker(maxDisappeared=40, maxDistance=50)
trackers = []
trackableObjects = {}
# initialize the total number of frames processed thus far, along
# with the total number of objects that have moved either up or down
totalFrames = 0
totalDown = 0
totalUp = 0
# start the frames per second throughput estimator
fps = FPS().start()
# loop over frames from the video stream
while True:
# grab the next frame and handle if we are reading from either
# VideoCapture or VideoStream
frame = vs.read()
frame = frame[1] if args.get("input", False) else frame
# if we are viewing a video and we did not grab a frame then we
# have reached the end of the video
if args["input"] is not None and frame is None:
break
# resize the frame to have a maximum width of 500 pixels (the
# less data we have, the faster we can process it), then convert
# the frame from BGR to RGB for dlib
frame = imutils.resize(frame, width=500)
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# if the frame dimensions are empty, set them
if W is None or H is None:
(H, W) = frame.shape[:2]
# if we are supposed to be writing a video to disk, initialize
# the writer
if args["output"] is not None and writer is None:
fourcc = cv2.VideoWriter_fourcc(*"MJPG")
writer = cv2.VideoWriter(args["output"], fourcc, 30,
(W, H), True)
# initialize the current status along with our list of bounding
# box rectangles returned by either (1) our object detector or
# (2) the correlation trackers
status = "Waiting"
rects = []
# check to see if we should run a more computationally expensive
# object detection method to aid our tracker
if totalFrames % args["skip_frames"] == 0:
# set the status and initialize our new set of object trackers
status = "Detecting"
trackers = []
# convert the frame to a blob and pass the blob through the
# network and obtain the detections
blob = cv2.dnn.blobFromImage(frame, 0.007843, (W, H), 127.5)
net.setInput(blob)
detections = net.forward()
# loop over the detections
for i in np.arange(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated
# with the prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections by requiring a minimum
# confidence
if confidence > args["confidence"]:
# extract the index of the class label from the
# detections list
idx = int(detections[0, 0, i, 1])
# if the class label is not a person, ignore it
if CLASSES[idx] != "person":
continue
# compute the (x, y)-coordinates of the bounding box
# for the object
box = detections[0, 0, i, 3:7] * np.array([W, H, W, H])
(startX, startY, endX, endY) = box.astype("int")
# construct a dlib rectangle object from the bounding
# box coordinates and then start the dlib correlation
# tracker
tracker = dlib.correlation_tracker()
rect = dlib.rectangle(startX, startY, endX, endY)
tracker.start_track(rgb, rect)
# add the tracker to our list of trackers so we can
# utilize it during skip frames
trackers.append(tracker)
# otherwise, we should utilize our object *trackers* rather than
# object *detectors* to obtain a higher frame processing throughput
else:
# loop over the trackers
for tracker in trackers:
# set the status of our system to be 'tracking' rather
# than 'waiting' or 'detecting'
status = "Tracking"
# update the tracker and grab the updated position
tracker.update(rgb)
pos = tracker.get_position()
# unpack the position object
startX = int(pos.left())
startY = int(pos.top())
endX = int(pos.right())
endY = int(pos.bottom())
# add the bounding box coordinates to the rectangles list
rects.append((startX, startY, endX, endY))
# draw a horizontal line in the center of the frame -- once an
# object crosses this line we will determine whether they were
# moving 'up' or 'down'
cv2.line(frame, (W//2, 0), (W//2, H ), (0, 255, 255), 2)
# use the centroid tracker to associate the (1) old object
# centroids with (2) the newly computed object centroids
objects = ct.update(rects)
# loop over the tracked objects
for (objectID, centroid) in objects.items():
# check to see if a trackable object exists for the current
# object ID
to = trackableObjects.get(objectID, None)
# if there is no existing trackable object, create one
if to is None:
to = TrackableObject(objectID, centroid)
# otherwise, there is a trackable object so we can utilize it
# to determine direction
else:
# the difference between the y-coordinate of the *current*
# centroid and the mean of *previous* centroids will tell
# us in which direction the object is moving (negative for
# 'up' and positive for 'down')
y = [c[0] for c in to.centroids]
direction = centroid[0] - np.mean(y)
to.centroids.append(centroid)
# check to see if the object has been counted or not
if not to.counted:
# if the direction is negative (indicating the object
# is moving up) AND the centroid is above the center
# line, count the object
if direction < 0 and centroid[0] < W // 2:
totalUp += 1
to.counted = True
# if the direction is positive (indicating the object
# is moving down) AND the centroid is below the
# center line, count the object
elif direction > 0 and centroid[0] > W // 2:
totalDown += 1
to.counted = True
# store the trackable object in our dictionary
trackableObjects[objectID] = to
# draw both the ID of the object and the centroid of the
# object on the output frame
text = "ID {}".format(objectID)
cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)
# construct a tuple of information we will be displaying on the
# frame
info = [
("Up", totalUp),
("Down", totalDown),
("Status", status),
]
# loop over the info tuples and draw them on our frame
for (i, (k, v)) in enumerate(info):
text = "{}: {}".format(k, v)
cv2.putText(frame, text, (10, H - ((i * 20) + 20)),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
# check to see if we should write the frame to disk
if writer is not None:
writer.write(frame)
# show the output frame
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
# increment the total number of frames processed thus far and
# then update the FPS counter
totalFrames += 1
fps.update()
cv2.waitKey(50)
# stop the timer and display FPS information
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
# check to see if we need to release the video writer pointer
if writer is not None:
writer.release()
# if we are not using a video file, stop the camera video stream
if not args.get("input", False):
vs.stop()
# otherwise, release the video file pointer
else:
vs.release()
# close any open windows
cv2.destroyAllWindows()