为了创建一个目标识别系统,需要从每张图像中提取特征向量。每张图像需要有一个识别标志,以用于匹配。
我们用视觉码本的概念来创建图像识别标志。在训练数据集中,码本实际上就是一个字典,用于提出关于图像的描述,我们用向量量化方法将很多特征点进行聚类并得出中心点,这些中心点将作为视觉码本的元素。
训练数据集
包含3类实例训练数据集,每一类包含20幅图像,可以在http://www.vision.caltech.edu/html-files/archive.html 下载。
处理加载数据集:
def load_training_data(input_folder):
training_data = [] # 以list的形式 存储数据集中的图片信息
if not os.path.isdir(input_folder):
raise IOError("The folder " + input_folder + " doesn't exist")
for root, dirs, files in os.walk(input_folder):
for filename in (x for x in files if x.endswith('.jpg')):
filepath = os.path.join(root, filename)
# filepath 输出为 'training_images/airplanes\\0001.jpg'
filepath = filepath.replace('\\','/')
# 替换字符\\ 以方便处理 提取label 此时filepath 输出为:'training_images/airplanes/0001.jpg'
object_class = filepath.split('/')[-2]
# 此时 object_class 为:airplanes
# 将每幅图像的信息以字典的形式保存在 training_data
training_data.append({'object_class': object_class,
'image_path': filepath})
return training_data
提取图片的特征:
class FeatureBuilder(object):
'''
定义一个从输入图像提取特征的方法,
用star检测器获取关键点,然后用SIFT提取这些位置的描述信息
'''
# 提取图片的特征
def extract_features(self, img):
#用Start获取关键点,
keypoints = StarFeatureDetector().detect(img)
# 用SIFT提取关键点的位置信息,keypoint是list类型。
keypoints, feature_vectors = compute_sift_features(img, keypoints)
# feature_vectors 是numpy.ndarray类型
return feature_vectors
def get_codewords(self, input_map, scaling_size, max_samples=12):
#max_samples:定义每类样本数据的最大样本数:如果大于最大样本数则后面相同样本的数据就跳过
#input_map是所有样本数据的label和位置路径信息即训练数据,list类型
keypoints_all = []
#用 keypoints_all 存储所有图片的关键点特征信息
count = 0
cur_class = ''
for item in input_map:
# item是样本的 信息
#例如:{'image_path': 'training_images/airplanes/0001.jpg', 'object_class': 'airplanes'}
# 如果大于样本数则跳过此样本 即: continue
if count >= max_samples:
if cur_class != item['object_class']:
count = 0
else:
continue
count += 1
if count == max_samples:
print("Built centroids for", item['object_class'])
# cur_class 记录当前样本的lebel, 然后读取图像
cur_class = item['object_class']
img = cv2.imread(item['image_path'])
img = resize_image(img, scaling_size)
num_dims = 128
# 获取样本图像的 keypoint 关键点信息
feature_vectors = self.extract_features(img)
# 将keypoint 关键点信息 存储在 keypoints_all中
keypoints_all.extend(feature_vectors)
#对 keypoints_all 进行聚类
kmeans, centroids = BagOfWords().cluster(keypoints_all)
return kmeans, centroids
定义一个类来处理词袋模型和向量量化
class BagOfWords(object):
def __init__(self, num_clusters=32):
self.num_dims = 128
self.num_clusters = num_clusters
self.num_retries = 10
# 用kmeans聚类来实现量化数据点
def cluster(self, datapoints):
kmeans = KMeans(self.num_clusters,
n_init=max(self.num_retries, 1),
max_iter=10, tol=1.0)
#提取中心点
res = kmeans.fit(datapoints)
centroids = res.cluster_centers_
return kmeans, centroids
# 归一化数据
def normalize(self, input_data):
sum_input = np.sum(input_data)
if sum_input > 0:
return input_data / sum_input
else:
return input_data
# 获得图像的特征向量
def construct_feature(self, img, kmeans, centroids):
#获取图像的keypoints和位置信息
keypoints = StarFeatureDetector().detect(img)
keypoints, feature_vectors = compute_sift_features(img, keypoints)
# 用kmeans预测一幅图片的label
labels = kmeans.predict(feature_vectors)
feature_vector = np.zeros(self.num_clusters)
# 创建直方图将其归一化
for i, item in enumerate(feature_vectors):
feature_vector[labels[i]] += 1
feature_vector_img = np.reshape(feature_vector,
((1, feature_vector.shape[0])))
return self.normalize(feature_vector_img)
输入图像提取特征然后映射到某一类
def compute_sift_features(img, keypoints):
if img is None:
raise TypeError('Invalid input image')
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
keypoints, descriptors = cv2.xfeatures2d.SIFT_create().compute(img_gray, keypoints)
return keypoints, descriptors
定义一个
def get_feature_map(input_map, kmeans, centroids, scaling_size):
feature_map = []
for item in input_map:
temp_dict = {}
temp_dict['object_class'] = item['object_class']
print("Extracting features for", item['image_path'])
img = cv2.imread(item['image_path'])
img = resize_image(img, scaling_size)
temp_dict['feature_vector'] = BagOfWords().construct_feature(
img, kmeans, centroids)
if temp_dict['feature_vector'] is not None:
feature_map.append(temp_dict)
return feature_map
resize_image
def resize_image(input_img, new_size):
h, w = input_img.shape[:2]
scaling_factor = new_size / float(h)
if w < h:
scaling_factor = new_size / float(w)
new_shape = (int(w * scaling_factor), int(h * scaling_factor))
return cv2.resize(input_img, new_shape)
Star检测器
class StarFeatureDetector(object):
def __init__(self):
self.detector = cv2.xfeatures2d.StarDetector_create()
def detect(self, img):
return self.detector.detect(img)
主文件import
# -*- coding:utf8 -*-
import os
import sys
import argparse
# import cPickle as pickle
import pickle as pickle
import json
import cv2
import numpy as np
from sklearn.cluster import KMeans
- 在pycharm里编辑输入信息 方便调试
if __name__ == '__main__':
data_folder = 'training_images/'
scaling_size = 200
codebook_file = 'codebook/9_8.pkl'
feature_map_file = 'feature_map/9_8.pkl'
training_data = load_training_data(data_folder)
# Build the visual codebook
print("====== Building visual codebook ======")
kmeans, centroids = FeatureBuilder().get_codewords(training_data, scaling_size)
if codebook_file:
with open(codebook_file, 'wb+') as f:
pickle.dump((kmeans, centroids), f)
# Extract features from input images
print("\n====== Building the feature map ======")
feature_map = get_feature_map(training_data, kmeans, centroids, scaling_size)
if feature_map_file:
with open(feature_map_file, 'wb+') as f:
pickle.dump(feature_map, f)
- 命令行方式运行文件
# 定义命令行输入方式
def build_arg_parser():
parser = argparse.ArgumentParser(description='Extract features from a given \
set of images')
parser.add_argument("--data-folder", dest="data_folder", required=True,
help="Folder containing the training images organized in subfolders")
parser.add_argument("--codebook-file", dest='codebook_file', required=True,
help="Output file where the codebook will be stored")
parser.add_argument("--feature-map-file", dest='feature_map_file', required=True,
help="Output file where the feature map will be stored")
parser.add_argument("--scaling-size", dest="scaling_size", type=int,
default=200, help="Scales the longer dimension of the image down \
to this size.")
return parser
if __name__ == '__main__':
args = build_arg_parser().parse_args()
data_folder = args.data_folder
scaling_size = args.scaling_size
# Load the training data
training_data = load_training_data(data_folder)
# Build the visual codebook
print("====== Building visual codebook ======")
kmeans, centroids = FeatureBuilder().get_codewords(training_data, scaling_size)
if args.codebook_file:
with open(args.codebook_file, 'wb+') as f:
pickle.dump((kmeans, centroids), f)
# Extract features from input images
print("\n====== Building the feature map ======")
feature_map = get_feature_map(training_data, kmeans, centroids, scaling_size)
if args.feature_map_file:
with open(args.feature_map_file, 'wb+') as f:
pickle.dump(feature_map, f)