如果是从github上直接下载的,在notebook文件夹下应该会有一个名字是data的子文件夹,里面已经有了相关的数据集,若没有,则可以在这里下载。
首先需要给压缩包解压。
datadir = "data/chapter6"
dataset = "pedestrians128x64"
datafile = "%s/%s.tar.gz" % (datadir, dataset)
extractdir = "%s/%s" % (datadir, dataset)
def extract_tar(datafile, extractdir):
try:
import tarfile
except ImportError:
raise ImportError("You do not have tarfile installed. "
"Try unzipping the file outside of Python.")
tar = tarfile.open(datafile)
tar.extractall(path=extractdir)
tar.close()
print("%s successfully extracted to %s" % (datafile, extractdir))
'''开始解压'''
extract_tar(datafile, datadir)
图片由924张64x128大小的彩色图像,但是.ppm格式的,用第三方画图软件,ps之类的可以直接查看。这里用程序读取5张图片为示例:
import cv2
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
for i in range(5):
filename = "%s/per0010%d.ppm" % (extractdir, i)
img = cv2.imread(filename)
plt.subplot(1, 5, i + 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()
HOG的核心思想就是图片中出现的物体局部的形状和特征可以被图片中检测到的边界方向的分布所描述。
我们用 cv2.HOGDescriptor 可以使用opencv中的HOG检测器,并且设置检测窗口的尺寸为(48,96),每个子块的大小(16,16),最小单元大小(8,8)以及步长。
win_size = (48, 96)
block_size = (16, 16)
block_stride = (8, 8)
cell_size = (8, 8)
num_bins = 9
hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, num_bins)
import numpy as np
import random
random.seed(42)
X_pos = []
for i in random.sample(range(900), 400):
filename = "%s/per%05d.ppm" % (extractdir, i)
img = cv2.imread(filename)
if img is None:
print('Could not find image %s' % filename)
continue
X_pos.append(hog.compute(img, (64, 64)))
X_pos = np.array(X_pos, dtype=np.float32)
y_pos = np.ones(X_pos.shape[0], dtype=np.int32)
print(X_pos.shape, y_pos.shape)
以上我们选择了399个样本,并从中可以得到了1980个HOG特征值。
刚才的操作数据均为正样本,即图片中只包含行人。负样本仍然在data文件夹里,若没有,点击这里下载。
negset = "pedestrians_neg"
negfile = "%s/%s.tar.gz" % (datadir, negset)
negdir = "%s/%s" % (datadir, negset)
extract_tar(negfile, datadir)
这个负例图片都是256x256大小的,我们得保证和正例的64x128一样,所以我们要将他们裁剪成64x128。
import os
hroi = 128
wroi = 64
X_neg = []
for negfile in os.listdir(negdir):
filename = '%s/%s' % (negdir, negfile)
img = cv2.imread(filename)
img = cv2.resize(img, (512, 512))
for j in range(5):
rand_y = random.randint(0, img.shape[0] - hroi)
rand_x = random.randint(0, img.shape[1] - wroi)
roi = img[rand_y:rand_y + hroi, rand_x:rand_x + wroi, :]
X_neg.append(hog.compute(roi, (64, 64)))
X_neg = np.array(X_neg, dtype=np.float32)
y_neg = -np.ones(X_neg.shape[0], dtype=np.int32)
print(X_neg.shape, y_neg.shape)
最终我们还是要用sklearn工具包中的 train_test_split 函数分离数据集。
X = np.concatenate((X_pos, X_neg))
y = np.concatenate((y_pos, y_neg))
from sklearn import model_selection as ms
X_train, X_test, y_train, y_test = ms.train_test_split(
X, y, test_size=0.2, random_state=42
)
我们现在将训练SVM写成一个函数方便调用。
def train_svm(X_train, y_train):
svm = cv2.ml.SVM_create()
svm.train(X_train, cv2.ml.ROW_SAMPLE, y_train)
return svm
def score_svm(svm, X, y):
from sklearn import metrics
_, y_pred = svm.predict(X)
return metrics.accuracy_score(y, y_pred)
'''开始训练'''
svm = train_svm(X_train, y_train)
'''检测正确率'''
print(score_svm(svm, X_train, y_train))
print(score_svm(svm, X_test, y_test))
可以看见训练的准确率为100%,测试的准确率只有64.6%,这时典型的过拟合现象。
Bootstrapping的核心思想就是多次训练,讲分类次分错的样本从新添加到训练样本中进行训练,以此来改善模型的能力。
score_train = []
score_test = []
for j in range(3):
svm = train_svm(X_train, y_train)
score_train.append(score_svm(svm, X_train, y_train))
score_test.append(score_svm(svm, X_test, y_test))
_, y_pred = svm.predict(X_test)
false_pos = np.logical_and((y_test.ravel() == -1), (y_pred.ravel() == 1))
if not np.any(false_pos):
print('done')
break
X_train = np.concatenate((X_train, X_test[false_pos, :]), axis=0)
y_train = np.concatenate((y_train, y_test[false_pos]), axis=0)
print(score_train)
print(score_test)
img_test = cv2.imread('data/chapter6/pedestrian_test.jpg')
'''圈出行人'''
stride = 16
found = []
for ystart in np.arange(0, img_test.shape[0], stride):
for xstart in np.arange(0, img_test.shape[1], stride):
if ystart + hroi > img_test.shape[0]:
continue
if xstart + wroi > img_test.shape[1]:
continue
roi = img_test[ystart:ystart + hroi, xstart:xstart + wroi, :]
feat = np.array([hog.compute(roi, (64, 64))])
_, ypred = svm.predict(feat)
if np.allclose(ypred, 1):
found.append((ystart, xstart, hroi, wroi))
hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, num_bins)
rho, _, _ = svm.getDecisionFunction(0)
sv = svm.getSupportVectors()
hog.setSVMDetector(np.append(sv[0, :].ravel(), rho))
hogdef = cv2.HOGDescriptor()
hogdef.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
found, _ = hogdef.detectMultiScale(img_test)
fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)
ax.imshow(cv2.cvtColor(img_test, cv2.COLOR_BGR2RGB))
from matplotlib import patches
for f in found:
ax.add_patch(patches.Rectangle((f[0], f[1]), f[2], f[3], color='y', linewidth=3, fill=False))
plt.savefig('detected.png')