很多初学opencv的朋友,可能会茫无头绪,不知道怎么和python搭配,做一些简单的图像处理工作。在这里分享一个自己做过的一个小课题:公交上下车流量检测中的一部分——人头检测。
关于Hog特征和svm原理的相关讲解博客,前人已经做的相当完备了。我这里就不再讲述原理,直接就我之前编的代码进行详解,帮助一些初学者快速了解opencv的常用函数,以及如何使用python,编写一些并实现的目标检测。
大佬到这就可以结束了
在看代码之前,推荐点击:这是我针对我写的代码,总结的算法思路
1 train
import cv2
import numpy as np
import random
import xml.dom.minidom
from non_maximum import non_max_suppression_fast as nms
def gamma_trans(img,gamma): #伽马变换
gamma_table = [np.power(x/255.0,gamma)*255.0 for x in range(256)]
gamma_table = np.round(np.array(gamma_table)).astype(np.uint8)
return cv2.LUT(img,gamma_table)
def load_images(dirname,size): #加载图片
img_list = []
for i in range(size):
path = dirname + str(i+1) +'.jpg' #图片存放路径
img = cv2.imread(path) #调用opencv的imread函数读取图片,得到一个通道为三的数组
img_list.append(img)
path = dirname
return img_list
def extract_images(path,img_list,size,wsize=(80,80)): #提取目标区域,构成正样本(我利用vott软件将带有人 头的图片标记出来,并导出xml格式的文件,这个函数用于读取图片上的人头区域并构成待检测列表)
extract_img = []
for i in range(size):
path1 = path + str(i+1) +'.xml'
doc = xml.dom.minidom.parse(path1)
root = doc.documentElement #xml文件的python读取方式
xminnode = root.getElementsByTagName("xmin")
xmaxnode = root.getElementsByTagName("xmax")
ymaxnode = root.getElementsByTagName("ymax")
yminnode = root.getElementsByTagName("ymin")
xmin = int(float(xminnode[0].childNodes[0].nodeValue))
xmax = int(float(xmaxnode[0].childNodes[0].nodeValue))
ymin = int(float(yminnode[0].childNodes[0].nodeValue))
ymax = int(float(ymaxnode[0].childNodes[0].nodeValue))
roi = img_list[i][ymin:ymin+wsize[1],xmin:xmin+wsize[0]] #取目标区域
if roi.shape[1] != 80 or roi.shape[0] != 80:
continue
extract_img.append(roi)
return extract_img
def extract_neg_img(dirname,extract_neg,wsize=(80,80)):#基于没有人头的图片,随机生成负样本
x = 10
xmin,xmax,ymin,ymax = 0,0,0,0
for i in range(15):
path = dirname + str(i+1)+'.jpg'
img = cv2.imread(path)
path = dirname
for j in range(x):
xmin = random.randint(1,288-wsize[0])
ymin = random.randint(1,352-wsize[1])
xmax = xmin + wsize[0]
ymax = ymin + wsize[1]
roi = img[xmin:xmax,ymin:ymax]
extract_neg.append(roi)
return extract_neg
def computeHOGs(img_list,gradient_list,wsize=(80,80)): #计算roi的hog特征
hog = cv2.HOGDescriptor((80,80),(40,40),(8,8),(8,8),9)
for i in range(len(img_list)):
img = img_list[i]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #灰度化
gray = cv2.equalizeHist(gray) #直方图均衡化,防止有些图片太黑或者太亮
gray = gamma_trans(gray,0.8)
gradient_list.append(hog.compute(gray))
return gradient_list
def sliding_window(image, stepSize, windowSize): #滑动窗口,用于检测时取区域
for y in range(0, image.shape[0], stepSize):
for x in range(0, image.shape[1], stepSize):
yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]])#哪个维度超纲,哪个维度就显示原图
def resize(img, scaleFactor): #重新定义大小,保证输入计算hog特征的图片时一样大的,不然特征向量维度不一样,无法分类
return cv2.resize(img, (int(img.shape[1] * (1 / scaleFactor)), int(img.shape[0] * (1 / scaleFactor))), interpolation=cv2.INTER_AREA)
#读取HOG特征
neg_list = []
pos_list = []
gradient_list = []
labels = []
path1 = 'E:\\data\\pos\\'
path2 = 'E:\\data\\neg\\'
path_pos = 'E:\\data\\post\\train-PascalVOC-export\\Annotations\\'
path_neg = 'E:\\data\\negtive\\negtive-PascalVOC-export\\Annotations\\'
pos_list = load_images(path1,333)
pos_list = extract_images(path_pos,pos_list,333,wsize=(80,80))
neg_list = load_images(path2,193)
neg_list = extract_images(path_neg,neg_list,193,wsize=(80,80))
neg_list = extract_neg_img(path2,neg_list,wsize=(80,80))
#neg_list = extract_neg_img(path2,wsize=(80,80))
computeHOGs(pos_list,gradient_list)
for _ in range(len(pos_list)):
labels.append(+1)
computeHOGs(neg_list,gradient_list)
for _ in range(len(neg_list)):
labels.append(-1)
#训练svm
svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)
svm.setGamma(0.001)
svm.setC(30)
svm.setKernel(cv2.ml.SVM_RBF)
svm.train(np.array(gradient_list), cv2.ml.ROW_SAMPLE, np.array(labels))
svm.save("svm.xml")
2 test
svm2 = cv2.ml.SVM_load("svm.xml") #加载之前保存的支持向量
videoCapture = cv2.VideoCapture('52.avi') #opencv 的视频类,为了操作视频
videoCapture.set(cv2.CAP_PROP_POS_FRAMES,4203) #设置当前帧为4203,下一步读取时即为此帧
success,img = videoCapture.read()
rectangles = []
counter = 0
scale = 1
w,h = 80,80
font = cv2.FONT_HERSHEY_PLAIN
hog = cv2.HOGDescriptor((80,80),(40,40),(8,8),(8,8),9)
for (x, y, roi) in sliding_window(img, 10, (80, 80)):#对得到的图进行滑动窗口,取目标区域用于检测(100, 40)为窗口大小,本文应取(80,80)
if roi.shape[1] != w or roi.shape[0] != h: #判断是否超纲
continue
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) #需要和训练时一样,进行相同的数据预处理
gray = cv2.equalizeHist(gray)
gray = gamma_trans(gray,0.8)
test_gradient = hog.compute(gray)
_, result = svm.predict(np.array([test_gradient])) #svm预测,下一个函数给出预测的置信度,负的越大置信度越高
a, res = svm.predict(np.array([test_gradient]), flags=cv2.ml.STAT_MODEL_RAW_OUTPUT)
score = res[0][0]
if result[0][0] == 1:
if score < -1:
print(score)
rx, ry, rx2, ry2 = int(x * scale), int(y * scale), int((x+w) * scale), int((y+h) * scale)
rectangles.append([rx, ry, rx2, ry2, score])
counter += 1
print(counter)
windows = np.array(rectangles)
boxes = nms(windows,0.5)
print(len(boxes))
for (x, y, x2, y2, score) in boxes: #画出检测到的人头
cv2.rectangle(img, (int(x),int(y)),(int(x2), int(y2)),(0, 255, 0), 1)
cv2.putText(img, "%f" % score, (int(x),int(y)), font, 1, (0, 255, 0))
cv2.imshow("img", img) #显示图像,常与下一函数连用,否则图片一闪而过
cv2.waitKey(0)
3 非最大值抑制
滑动串口因为步长不能太大,有些窗口相邻可能同时检测到人头,但其实是同一个人头。此时我们就根据svm的预测分值,取较大的作为检测结果
import numpy as np
def area(box):
return (abs(box[2] - box[0])) * (abs(box[3] - box[1]))
def overlaps(a, b, thresh=0.5):
x1 = np.maximum(a[0], b[0])
x2 = np.minimum(a[2], b[2])
y1 = np.maximum(a[1], b[1])
y2 = np.minimum(a[3], b[3])
intersect = float(area([x1, y1, x2, y2]))
return intersect / 6400 >= thresh
# Malisiewicz et al.
def non_max_suppression_fast(boxes, overlapThresh = 0.5): (重叠面积超过50%,默认是一个人头,进行非最大抑制)
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []
scores = boxes[:,4]
score_idx = np.argsort(scores)#返回scores的从小到大排序的 索引值
to_delete = []
while len(score_idx) > 0:
box = score_idx[0]
for s in score_idx:
if s == score_idx[0]:
#j=j+1
continue
if (overlaps(boxes[s], boxes[box], overlapThresh)):
to_delete.append(s)
a = np.where(score_idx == s) #这里用到了numpy相关函数,初学者可以查一下用法
score_idx = np.delete(score_idx,a)
#j=j+1
score_idx = np.delete(score_idx,0)
boxes = np.delete(boxes,to_delete,axis=0)
return boxes
完整的代码链接:https://github.com/hunting777/hog-svm.git
我自己做了人头检测的数据集,若需要:https://zhuanlan.zhihu.com/p/114431564