Meanshift算法简单介绍:任意选定的初始迭代点,画一个半径记为H的蓝色圆,将其内所有的点与圆心相连,组成一个向量集,然后对此向量集进行向量相加,相加的结果如黑色向量所示,其终点指向下图所示的蓝色点,则下一次迭代以该蓝色点为圆心,H为半径画圆,然后求这个圆内以圆心为起点所有向量的和。如此迭代下去,圆的中心点为收敛于一个固定的点,也就是概率密度最大的地方。所以 均值漂移算法本质上是一种基于梯度的优化算法。
Meanshift的opencv实现:首先需要在视频画面中选取一个目标,画出初始边框,计算直方图,然后通过直方图反向投影,用于Meanshift计算。计算直方图时,仅考虑Hue视角。为防止光线较暗造成的错误,可以用cv.inRange()函数滤除。
import numpy as np
import cv2 as cv
cap = cv.VideoCapture('slow.flv')
# take first frame of the video
ret,frame = cap.read()
# setup initial location of window
r,h,c,w = 250,90,400,125 # simply hardcoded the values
track_window = (c,r,w,h)
# set up the ROI for tracking
roi = frame[r:r+h, c:c+w]
hsv_roi = cv.cvtColor(roi, cv.COLOR_BGR2HSV)
mask = cv.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.)))
roi_hist = cv.calcHist([hsv_roi],[0],mask,[180],[0,180])
cv.normalize(roi_hist,roi_hist,0,255,cv.NORM_MINMAX)
# Setup the termination criteria, either 10 iteration or move by atleast 1 pt
term_crit = ( cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 1 )
while(1):
ret ,frame = cap.read()
if ret == True:
hsv = cv.cvtColor(frame, cv.COLOR_BGR2HSV)
dst = cv.calcBackProject([hsv],[0],roi_hist,[0,180],1)
# apply meanshift to get the new location
ret, track_window = cv.meanShift(dst, track_window, term_crit)
# Draw it on image
x,y,w,h = track_window
img2 = cv.rectangle(frame, (x,y), (x+w,y+h), 255,2)
cv.imshow('img2',img2)
k = cv.waitKey(60) & 0xff
if k == 27:
break
else:
cv.imwrite(chr(k)+".jpg",img2)
else:
break
cv.destroyAllWindows()
cap.release()
在上例中,对目标跟踪的框带下不会随着目标的远近大小的不同,而改变目标框的大小,这是不合理的,因此需要动态调整目标框的大小。
Camshift它是MeanShift算法的改进,称为连续自适应的MeanShift算法,CamShift算法的全称是"Continuously Adaptive Mean-SHIFT",它的基本思想是视频图像的所有帧作MeanShift运算,并将上一帧的结果(即Search Window的中心和大小)作为下一帧MeanShift算法的Search Window的初始值,如此迭代下去。
import numpy as np
import cv2 as cv
cap = cv.VideoCapture('slow.flv')
# take first frame of the video
ret,frame = cap.read()
# setup initial location of window
r,h,c,w = 250,90,400,125 # simply hardcoded the values
track_window = (c,r,w,h)
# set up the ROI for tracking
roi = frame[r:r+h, c:c+w]
hsv_roi = cv.cvtColor(roi, cv.COLOR_BGR2HSV)
mask = cv.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.)))
roi_hist = cv.calcHist([hsv_roi],[0],mask,[180],[0,180])
cv.normalize(roi_hist,roi_hist,0,255,cv.NORM_MINMAX)
# Setup the termination criteria, either 10 iteration or move by atleast 1 pt
term_crit = ( cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 1 )
while(1):
ret ,frame = cap.read()
if ret == True:
hsv = cv.cvtColor(frame, cv.COLOR_BGR2HSV)
dst = cv.calcBackProject([hsv],[0],roi_hist,[0,180],1)
# apply meanshift to get the new location
ret, track_window = cv.CamShift(dst, track_window, term_crit)
# Draw it on image
pts = cv.boxPoints(ret)
pts = np.int0(pts)
img2 = cv.polylines(frame,[pts],True, 255,2)
cv.imshow('img2',img2)
k = cv.waitKey(60) & 0xff
if k == 27:
break
else:
cv.imwrite(chr(k)+".jpg",img2)
else:
break
cv.destroyAllWindows()
cap.release()
光流的概念是Gibson在1950年首先提出来的。它是空间运动物体在观察成像平面上的像素运动的瞬时速度,是利用图像序列中像素在时间域上的变化以及相邻帧之间的相关性来找到上一帧跟当前帧之间存在的对应关系,从而计算出相邻帧之间物体的运动信息的一种方法。一般而言,光流是由于场景中前景目标本身的移动、相机的运动,或者两者的共同运动所产生的。
光流(Optical Flow)是一种研究图像对齐的算法,一般包括两大类:稀疏光流和稠密光流。顾名思义,稀疏光流就是研究图像中稀疏点的光流,这些点一般是角点;稠密光流则是研究图像中所有点的偏移量。
import numpy as np
import cv2 as cv
cap = cv.VideoCapture('slow.flv')
# params for ShiTomasi corner detection
feature_params = dict( maxCorners = 100,
qualityLevel = 0.3,
minDistance = 7,
blockSize = 7 )
# Parameters for lucas kanade optical flow
lk_params = dict( winSize = (15,15),
maxLevel = 2,
criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 0.03))
# Create some random colors
color = np.random.randint(0,255,(100,3))
# Take first frame and find corners in it
ret, old_frame = cap.read()
old_gray = cv.cvtColor(old_frame, cv.COLOR_BGR2GRAY)
p0 = cv.goodFeaturesToTrack(old_gray, mask = None, **feature_params)
# Create a mask image for drawing purposes
mask = np.zeros_like(old_frame)
while(1):
ret,frame = cap.read()
frame_gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
# calculate optical flow
p1, st, err = cv.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)
# Select good points
good_new = p1[st==1]
good_old = p0[st==1]
# draw the tracks
for i,(new,old) in enumerate(zip(good_new,good_old)):
a,b = new.ravel()
c,d = old.ravel()
mask = cv.line(mask, (a,b),(c,d), color[i].tolist(), 2)
frame = cv.circle(frame,(a,b),5,color[i].tolist(),-1)
img = cv.add(frame,mask)
cv.imshow('frame',img)
k = cv.waitKey(30) & 0xff
if k == 27:
break
# Now update the previous frame and previous points
old_gray = frame_gray.copy()
p0 = good_new.reshape(-1,1,2)
cv.destroyAllWindows()
cap.release()
CalcOpticalFlowFarneback()函数是利用用Gunnar Farneback的算法计算全局性的稠密光流算法(即图像上所有像素点的光流都计算出来),由于要计算图像上所有点的光流,故计算耗时,速度慢。
import cv2 as cv
import numpy as np
cap = cv.VideoCapture("vtest.avi")
ret, frame1 = cap.read()
prvs = cv.cvtColor(frame1,cv.COLOR_BGR2GRAY)
hsv = np.zeros_like(frame1)
hsv[...,1] = 255
while(1):
ret, frame2 = cap.read()
next = cv.cvtColor(frame2,cv.COLOR_BGR2GRAY)
flow = cv.calcOpticalFlowFarneback(prvs,next, None, 0.5, 3, 15, 3, 5, 1.2, 0)
mag, ang = cv.cartToPolar(flow[...,0], flow[...,1])
hsv[...,0] = ang*180/np.pi/2
hsv[...,2] = cv.normalize(mag,None,0,255,cv.NORM_MINMAX)
bgr = cv.cvtColor(hsv,cv.COLOR_HSV2BGR)
cv.imshow('frame2',bgr)
k = cv.waitKey(30) & 0xff
if k == 27:
break
elif k == ord('s'):
cv.imwrite('opticalfb.png',frame2)
cv.imwrite('opticalhsv.png',bgr)
prvs = next
cap.release()
cv.destroyAllWindows()
主要是从背景图像中对目标进行跟踪。
import numpy as np
import cv2 as cv
cap = cv.VideoCapture('vtest.avi')
fgbg = cv.createBackgroundSubtractorMOG()
while(1):
ret, frame = cap.read()
fgmask = fgbg.apply(frame)
cv.imshow('frame',fgmask)
k = cv.waitKey(30) & 0xff
if k == 27:
break
cap.release()
cv.destroyAllWindows()
-函数原型:createBackgroundSubtractorMOG2 ( int history = 500,double varThreshold = 16,bool detectShadows = true )
import numpy as np
import cv2 as cv
cap = cv.VideoCapture('vtest.avi')
fgbg = cv.createBackgroundSubtractorMOG2()
while(1):
ret, frame = cap.read()
fgmask = fgbg.apply(frame)
cv.imshow('frame',fgmask)
k = cv.waitKey(30) & 0xff
if k == 27:
break
cap.release()
cv.destroyAllWindows()
import numpy as np
import cv2 as cv
cap = cv.VideoCapture('vtest.avi')
kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE,(3,3))
fgbg = cv.createBackgroundSubtractorGMG()
while(1):
ret, frame = cap.read()
fgmask = fgbg.apply(frame)
fgmask = cv.morphologyEx(fgmask, cv.MORPH_OPEN, kernel)
cv.imshow('frame',fgmask)
k = cv.waitKey(30) & 0xff
if k == 27:
break
cap.release()
cv.destroyAllWindows()
这篇更难学了,原理性的东西写不了多少,只能总结一下用法了。