opencv-python 入门实战:传统方法Hog+svm实现目标检测

前言

很多初学opencv的朋友,可能会茫无头绪,不知道怎么和python搭配,做一些简单的图像处理工作。在这里分享一个自己做过的一个小课题:公交上下车流量检测中的一部分——人头检测。

关于Hog特征和svm原理的相关讲解博客,前人已经做的相当完备了。我这里就不再讲述原理,直接就我之前编的代码进行详解帮助一些初学者快速了解opencv的常用函数,以及如何使用python,编写一些并实现的目标检测。

大佬到这就可以结束了

在看代码之前,推荐点击:这是我针对我写的代码,总结的算法思路

程序实现

1 train

import cv2
import numpy as np
import random
import xml.dom.minidom
from non_maximum import non_max_suppression_fast as nms
def gamma_trans(img,gamma):   #伽马变换
    gamma_table = [np.power(x/255.0,gamma)*255.0 for x in range(256)]
    gamma_table = np.round(np.array(gamma_table)).astype(np.uint8)
    return cv2.LUT(img,gamma_table)
def load_images(dirname,size):    #加载图片
    img_list = []
    for i in range(size):
        path = dirname + str(i+1) +'.jpg'   #图片存放路径
        img = cv2.imread(path)          #调用opencv的imread函数读取图片,得到一个通道为三的数组
        img_list.append(img)
        path = dirname
    return img_list
def extract_images(path,img_list,size,wsize=(80,80)):     #提取目标区域,构成正样本(我利用vott软件将带有人 头的图片标记出来,并导出xml格式的文件,这个函数用于读取图片上的人头区域并构成待检测列表)
    extract_img = []
    for i in range(size):
        path1 = path + str(i+1) +'.xml'
        doc = xml.dom.minidom.parse(path1)
        root = doc.documentElement      #xml文件的python读取方式
        xminnode = root.getElementsByTagName("xmin")
        xmaxnode = root.getElementsByTagName("xmax")
        ymaxnode = root.getElementsByTagName("ymax") 
        yminnode = root.getElementsByTagName("ymin") 
        xmin = int(float(xminnode[0].childNodes[0].nodeValue))
        xmax = int(float(xmaxnode[0].childNodes[0].nodeValue))
        ymin = int(float(yminnode[0].childNodes[0].nodeValue))
        ymax = int(float(ymaxnode[0].childNodes[0].nodeValue))
        roi = img_list[i][ymin:ymin+wsize[1],xmin:xmin+wsize[0]]    #取目标区域
        if roi.shape[1] != 80 or roi.shape[0] != 80:
            continue
        extract_img.append(roi)
    return extract_img
def extract_neg_img(dirname,extract_neg,wsize=(80,80)):#基于没有人头的图片,随机生成负样本
    x = 10
    xmin,xmax,ymin,ymax = 0,0,0,0
    for i in range(15):
        path = dirname + str(i+1)+'.jpg'
        img = cv2.imread(path)
        path = dirname
        for j in range(x):
            xmin = random.randint(1,288-wsize[0])
            ymin = random.randint(1,352-wsize[1])
            xmax = xmin + wsize[0]
            ymax = ymin + wsize[1]
            roi = img[xmin:xmax,ymin:ymax]
            extract_neg.append(roi)
    return extract_neg
def computeHOGs(img_list,gradient_list,wsize=(80,80)):     #计算roi的hog特征
    hog = cv2.HOGDescriptor((80,80),(40,40),(8,8),(8,8),9) 
    for i in range(len(img_list)):
        img = img_list[i]
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)    #灰度化
        gray = cv2.equalizeHist(gray)            #直方图均衡化,防止有些图片太黑或者太亮
        gray = gamma_trans(gray,0.8)
        gradient_list.append(hog.compute(gray))    
    return gradient_list
def sliding_window(image, stepSize, windowSize):      #滑动窗口,用于检测时取区域
  for y in range(0, image.shape[0], stepSize):
    for x in range(0, image.shape[1], stepSize):
        yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]])#哪个维度超纲,哪个维度就显示原图
def resize(img, scaleFactor):         #重新定义大小,保证输入计算hog特征的图片时一样大的,不然特征向量维度不一样,无法分类
  return cv2.resize(img, (int(img.shape[1] * (1 / scaleFactor)), int(img.shape[0] * (1 / scaleFactor))), interpolation=cv2.INTER_AREA)



#读取HOG特征
neg_list = []
pos_list = []
gradient_list = []
labels = []
path1 = 'E:\\data\\pos\\'
path2 = 'E:\\data\\neg\\'
path_pos = 'E:\\data\\post\\train-PascalVOC-export\\Annotations\\'
path_neg = 'E:\\data\\negtive\\negtive-PascalVOC-export\\Annotations\\'
pos_list = load_images(path1,333)
pos_list = extract_images(path_pos,pos_list,333,wsize=(80,80))
neg_list = load_images(path2,193)
neg_list = extract_images(path_neg,neg_list,193,wsize=(80,80))
neg_list = extract_neg_img(path2,neg_list,wsize=(80,80))
#neg_list = extract_neg_img(path2,wsize=(80,80))
computeHOGs(pos_list,gradient_list)
for _ in range(len(pos_list)):
    labels.append(+1)
computeHOGs(neg_list,gradient_list)
for _ in range(len(neg_list)):
    labels.append(-1)
#训练svm
svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)
svm.setGamma(0.001)
svm.setC(30)
svm.setKernel(cv2.ml.SVM_RBF)
svm.train(np.array(gradient_list), cv2.ml.ROW_SAMPLE, np.array(labels))
svm.save("svm.xml")

2 test

svm2 = cv2.ml.SVM_load("svm.xml")       #加载之前保存的支持向量
videoCapture = cv2.VideoCapture('52.avi')    #opencv 的视频类,为了操作视频
videoCapture.set(cv2.CAP_PROP_POS_FRAMES,4203)   #设置当前帧为4203,下一步读取时即为此帧
success,img = videoCapture.read()
rectangles = []
counter = 0
scale = 1
w,h = 80,80
font = cv2.FONT_HERSHEY_PLAIN
hog = cv2.HOGDescriptor((80,80),(40,40),(8,8),(8,8),9)  
for (x, y, roi) in sliding_window(img, 10, (80, 80)):#对得到的图进行滑动窗口,取目标区域用于检测(100, 40)为窗口大小,本文应取(80,80)
    
    if roi.shape[1] != w or roi.shape[0] != h:         #判断是否超纲
        continue
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)     #需要和训练时一样,进行相同的数据预处理
    gray = cv2.equalizeHist(gray)
    gray = gamma_trans(gray,0.8)
    test_gradient = hog.compute(gray)
    _, result = svm.predict(np.array([test_gradient]))     #svm预测,下一个函数给出预测的置信度,负的越大置信度越高
    a, res = svm.predict(np.array([test_gradient]), flags=cv2.ml.STAT_MODEL_RAW_OUTPUT)       
    score = res[0][0]
    if result[0][0] == 1:
        if score < -1:
            print(score)
            rx, ry, rx2, ry2 = int(x * scale), int(y * scale), int((x+w) * scale), int((y+h) * scale)
            rectangles.append([rx, ry, rx2, ry2, score])
    counter += 1 
print(counter)
windows = np.array(rectangles)
boxes = nms(windows,0.5)
print(len(boxes))
for (x, y, x2, y2, score) in boxes:  #画出检测到的人头
  cv2.rectangle(img, (int(x),int(y)),(int(x2), int(y2)),(0, 255, 0), 1)
  cv2.putText(img, "%f" % score, (int(x),int(y)), font, 1, (0, 255, 0))
cv2.imshow("img", img)    #显示图像,常与下一函数连用,否则图片一闪而过
cv2.waitKey(0)

3 非最大值抑制
滑动串口因为步长不能太大,有些窗口相邻可能同时检测到人头,但其实是同一个人头。此时我们就根据svm的预测分值,取较大的作为检测结果

import numpy as np
def area(box):
  return (abs(box[2] - box[0])) * (abs(box[3] - box[1]))

def overlaps(a, b, thresh=0.5):
  x1 = np.maximum(a[0], b[0])
  x2 = np.minimum(a[2], b[2])
  y1 = np.maximum(a[1], b[1])
  y2 = np.minimum(a[3], b[3])
  intersect = float(area([x1, y1, x2, y2]))
  return intersect / 6400 >= thresh

# Malisiewicz et al.
def non_max_suppression_fast(boxes, overlapThresh = 0.5):  (重叠面积超过50%,默认是一个人头,进行非最大抑制)
  # if there are no boxes, return an empty list
  if len(boxes) == 0:
    return []

  scores = boxes[:,4]
  score_idx = np.argsort(scores)#返回scores的从小到大排序的  索引值 
  to_delete = []
  while len(score_idx) > 0:
    box = score_idx[0]
    for s in score_idx:
      if s == score_idx[0]:
        #j=j+1
        continue
      if (overlaps(boxes[s], boxes[box], overlapThresh)):
        to_delete.append(s)
        a = np.where(score_idx == s)    #这里用到了numpy相关函数,初学者可以查一下用法
        score_idx = np.delete(score_idx,a)
      #j=j+1
    score_idx = np.delete(score_idx,0)
  boxes = np.delete(boxes,to_delete,axis=0)
  return boxes

补充说明

完整的代码链接:https://github.com/hunting777/hog-svm.git
我自己做了人头检测的数据集,若需要:https://zhuanlan.zhihu.com/p/114431564

你可能感兴趣的:(python,opencv,svm)