HOG:梯度直方图的缩写,涉及到Window大小,Block大小,Block的滑动步长,Cell大小和Bin大小(通常为9,360度等分9份),目的就是获取到Window中所有滑动后的block的cell的梯度直方图,组成特征向量。维数:N = ((W–wb )/stride + 1)*((H-hb)/stride+1)*bins*n,其中W为Window的宽,H为Window的高,wb和hb为Block的宽与高,stride为block的滑动步长,bins为投影的区块,n为一个Block中包含的Cell的个数。
INRIA Person Dataset: 该数据库是目前使用最多的静态行人检测数据库,提供原始图片及相应的标注文件。训练集有正样本614张(包含2416个行人),负样本1218张;测试集有正样本288张(包含1126个行人),负样本453张。图片中人体大部分为站立姿势且高度大于100个象素,部分标注可能不正确。图片主要来源于GRAZ-01、个人照片及google,因此图片的清晰度较高。在XP操作系统下部分训练或者测试图片无法看清楚,但可用OpenCV正常读取和显示。
1. 提取数据集的HOG特征:样本很重要,要贴合项目实际环境去获取图片数据集,不要以为随便搞些图片就可以了训练了。
2. 对正负样本进行训练得到模型
3. 使用训练好的模型生成检测子
4. 使用检测子对测试负样本集进行识别,找到识别错误的hard example:hard example是指利用第一次训练的分类器在负样本原图(肯定没有人体)上进行行人检测时所有检测到的矩形框,这些矩形框区域很明显都是误报,把这些误报的矩形框保存为图片,加入到初始的负样本集合中,重新进行SVM的训练,可显著减少误报。这种方法叫做自举法(Bootstrap),自举法首先使用初始负样本集来训练一个模型,然后收集被这个初始模型错误分类的负样本来形成一个负样本难例集。用此负样本难例集训练新的模型,此过程可以重复多次。
5.将hard example提取HOG特征,结合第一步所得到的特征一起训练出模型。
6. 识别:识别有两种方式,如果仅仅使用线性核的话,只需要Hog类自带的setSVMDetector和detect(detectMultiScale),如果使用RBF,就需要使用SVM类的predict。但predict是判断“是与不是”的,如果要得区域的话,使用Hog类的setSVMDetector和detect(detectMultiScale)。
7. 非极大抑制优化重叠检测区域
import cv2
import numpy as np
import random
def loadImageList(dirName, fileListPath):
imageList = [];
file = open(dirName + r'/' + fileListPath)
imageName = file.readline()
while imageName != '':
imageName = dirName + r'/' + imageName.split('/', 1)[1].strip('\n')
#print imageName
imageName = file.readline()
return imageList
#获取正样本,从(16, 16)截取大小为(128,64)的区域
def getPosSample(imageList):
posList = []
for i in range(len(imageList)):
roi = imageList[i][16:16+128, 16:16+64]
return posList
#获取负样本,从没有行人的图片中,随机裁剪出10张大小为(128, 64)的区域
def getNegSample(imageList):
negList = []
for i in range(len(imageList)):
for j in range(10):
y = int(random.random() * (len(imageList[i]) - 128))
x = int(random.random() * (len(imageList[i][0]) - 64))
negList.append(imageList[i][y:y + 128, x:x + 64])
return negList
def getHOGList(imageList):
HOGList = []
hog = cv2.HOGDescriptor()
for i in range(len(imageList)):
gray = cv2.cvtColor(imageList[i], cv2.COLOR_BGR2GRAY)
return HOGList
def getHOGDetector(svm):
sv = svm.getSupportVectors()
rho, _, _ = svm.getDecisionFunction(0)
sv = np.transpose(sv)
return np.append(sv, [[-rho]], 0)
#获取Hard example
def getHardExamples(negImageList, svm):
hardNegList = []
hog = cv2.HOGDescriptor()
for i in range(len(negImageList)):
rects, wei = hog.detectMultiScale(negImageList[i], winStride=(4, 4),padding=(8, 8), scale=1.05)
for (x,y,w,h) in rects:
hardExample = negImageList[i][y:y+h, x:x+w]
return hardNegList
def fastNonMaxSuppression(boxes, sc, overlapThresh):
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []
# if the bounding boxes integers, convert them to floats --
# this is important since we'll be doing a bunch of divisions
if boxes.dtype.kind == "i":
boxes = boxes.astype("float")
# initialize the list of picked indexes
pick = []
# grab the coordinates of the bounding boxes
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
scores = sc
# compute the area of the bounding boxes and sort the bounding
# boxes by the score of the bounding box
area = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(scores)
# keep looping while some indexes still remain in the indexes
# list
while len(idxs) > 0:
# grab the last index in the indexes list and add the
# index value to the list of picked indexes
last = len(idxs) - 1
i = idxs[last]
# find the largest (x, y) coordinates for the start of
# the bounding box and the smallest (x, y) coordinates
# for the end of the bounding box
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])
# compute the width and height of the bounding box
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
# compute the ratio of overlap
overlap = (w * h) / area[idxs[:last]]
idxs = np.delete(idxs, np.concatenate(([last],
np.where(overlap > overlapThresh)[0])))
# return only the bounding boxes that were picked using the
# integer data type
return boxes[pick]
labels = []
posImageList = []
posList = []
posImageList = []
posList = []
hosList = []
tem = []
hardNegList = []
posImageList = loadImageList(r"/home/ningshaohui/tfboy/INRIAPerson/train_64x128_H96", "pos.lst")
print ("posImageList:", len(posImageList))
posList = getPosSample(posImageList)
print ("posList", len(posList))
hosList = getHOGList(posList)
print ("hosList", len(hosList))
[labels.append(+1) for _ in range(len(posList))]
negImageList = loadImageList(r"/home/ningshaohui/tfboy/INRIAPerson/train_64x128_H96", "neg.lst")
print ("negImageList:", len(negImageList))
negList = getNegSample(negImageList)
print ("negList", len(negList))
print ("hosList", len(hosList))
[labels.append(-1) for _ in range(len(negList))]
print ("labels", len(labels))
####################至此得到SVM的所有特征和label(不含hard example)######################
#-d degree:核函数中的degree设置(针对多项式核函数)(默认3)
#-g r(gama):核函数中的gamma函数设置(针对多项式/rbf/sigmoid核函数)(默认1/ k)
#-r coef0:核函数中的coef0设置(针对多项式/sigmoid核函数)((默认0)
#-c cost:设置C-SVC,e -SVR和v-SVR的参数(损失函数)(默认1)
#-n nu:设置v-SVC,一类SVM和v- SVR的参数(默认0.5)
#-p p:设置e -SVR 中损失函数p的值(默认0.1)
#-m cachesize:设置cache内存大小,以MB为单位(默认40)
#-e eps:设置允许的终止判据(默认0.001)
#-h shrinking:是否使用启发式,0或1(默认1)
#-wi weight:设置第几类的参数C为weight*C(C-SVC中的C)(默认1)
#-v n: n-fold交互检验模式,n为fold的个数,必须大于等于2
svm = cv2.ml.SVM_create()
criteria = (cv2.TERM_CRITERIA_MAX_ITER + cv2.TERM_CRITERIA_EPS, 1000, 1e-3)#终止条件
svm.setP(0.1) # for EPSILON_SVR, epsilon in loss function?
svm.setC(0.01) # From paper, soft classifier 软间隔
svm.setType(cv2.ml.SVM_EPS_SVR) # C_SVC # EPSILON_SVR # may be also NU_SVR # do regression task
svm.train(np.array(hosList), cv2.ml.ROW_SAMPLE, np.array(labels))
#根据初始训练结果获取hard example
hardNegList = getHardExamples(negImageList, svm)
print ("hosList=====", len(hosList))
[labels.append(-1) for _ in range(len(hardNegList))]
####################至此得到SVM的所有特征和label(含hard example)######################
####################实测添加hard example可以很大提高检测的查准率#########################
#添加hard example后,重新训练
svm.train(np.array(hosList), cv2.ml.ROW_SAMPLE, np.array(labels))
hog = cv2.HOGDescriptor()
#hog.load('myHogDector.bin') #因为在同一个文件中,不需要加载模型
hog = cv2.HOGDescriptor()
image = cv2.imread("1.jpg")
cv2.imshow("image", image)
rects, scores = hog.detectMultiScale(image, winStride=(4, 4),padding=(8, 8), scale=1.05)
for i in range(len(rects)):
r = rects[i]
rects[i][2] = r[0] + r[2]
rects[i][3] = r[1] + r[3]
sc = [score[0] for score in scores]
sc = np.array(sc)
pick = []
pick = fastNonMaxSuppression(rects, sc, overlapThresh = 0.3)
print('pick_len = ',len(pick))
for (x, y, xx, yy) in pick:
print (x, y, xx, yy)
cv2.rectangle(image, (int(x), int(y)), (int(xx), int(yy)), (0, 0, 255), 2)
cv2.imshow('a', image)