BoW起始可以理解为一种直方图统计,开始是用于自然语言处理和信息检索中的一种简单的文档表示方法。 和histogram 类似,BoW也只是统计频率信息,并没有序列信息。而和histogram不同的是,histogram一般统计的某个区间的频数,BoW是选择words字典,然后统计字典中每个单词出现的次数。
视觉词袋大概过程首先提取图像集特征的集合,然后通过聚类的方法聚出若干类,将这些类作为dictionary,即相当于words,最后每个图像统计字典中words出现的频数作为输出向量,就可以用于后续的分类、检索等操作。
以sift特征为例,假设图像集中包含人脸、自行车、吉他等,我们首先对每幅图像提取sift特征,然后使用如kmeans等聚类方法,进行聚类得到码本(dictionary)
1.读图
def gettestfiles():
imlist={}
count=0
for each in glob(r'./images/test/' + "*"):
word= each.split("/")[-1]
print( " #### Reading image category ", word, " ##### ")
imlist[word]=[]
for imagefile in glob(each+'/*'):
im=cv2.imread(imagefile, 0)
imlist[word].append(im)
count +=1
return [imlist, count]
2.sift特征
def trainModel(self):
"""
This method contains the entire module
required for training the bag of visual words model
Use of helper functions will be extensive.
"""
# read file. prepare file lists.
#self.images, self.trainImageCount = self.file_helper.getFiles()
# extract SIFT Features from each image
label_count = 0
for word in self.images.keys():
self.name_dict[str(label_count)] = word
print ("Computing Features for ", word)
for im in self.images[word]:
# cv2.imshow("im", im)
# cv2.waitKey()
self.train_labels = np.append(self.train_labels, label_count)
kp, des = self.im_helper.features(im)
self.descriptor_list.append(des)
label_count += 1
class ImageHelpers:
def __init__(self):
self.sift_object = cv2.xfeatures2d.SIFT_create()
def gray(self, image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
return gray
def features(self, image):
keypoints, descriptors = self.sift_object.detectAndCompute(image, None)
return [keypoints, descriptors]
这里得到了整个图像集的sift特征,des为128维,这里可以采用pca降维
聚类,Kmeans
# perform clustering
bov_descriptor_stack = self.bov_helper.formatND(self.descriptor_list)
self.bov_helper.cluster()
self.bov_helper.developVocabulary(n_images = self.trainImageCount, descriptor_list=self.descriptor_list)
def formatND(self, l):
"""
restructures list into vstack array of shape
M samples x N features for sklearn
"""
vStack = np.array(l[0])
for remaining in l[1:]:
vStack = np.vstack((vStack, remaining))
self.descriptor_vstack = vStack.copy()
return vStack
def cluster(self):
"""
cluster using KMeans algorithm,
"""
self.kmeans_ret = self.kmeans_obj.fit_predict(self.descriptor_vstack)
def developVocabulary(self,n_images, descriptor_list, kmeans_ret = None):
"""
Each cluster denotes a particular visual word
Every image can be represeted as a combination of multiple
visual words. The best method is to generate a sparse histogram
that contains the frequency of occurence of each visual word
Thus the vocabulary comprises of a set of histograms of encompassing
all descriptions for all images
"""
self.mega_histogram = np.array([np.zeros(self.n_clusters) for i in range(n_images)])
old_count = 0
for i in range(n_images):
l = len(descriptor_list[i])
for j in range(l):
if kmeans_ret is None:
idx = self.kmeans_ret[old_count+j]
else:
idx = kmeans_ret[old_count+j]
self.mega_histogram[i][idx] += 1
old_count += l
print ("Vocabulary Histogram Generated")
分类:SVM
self.bov_helper.standardize()
self.bov_helper.train(self.train_labels)
def standardize(self, std=None):
"""
standardize is required to normalize the distribution
wrt sample size and features. If not normalized, the classifier may become
biased due to steep variances.
"""
if std is None:
self.scale = StandardScaler().fit(self.mega_histogram)
self.mega_histogram = self.scale.transform(self.mega_histogram)
else:
print( "STD not none. External STD supplied")
self.mega_histogram = std.transform(self.mega_histogram)
def train(self, train_labels):
"""
uses sklearn.svm.SVC classifier (SVM)
"""
print ("Training SVM")
print (self.clf)
print ("Train labels", train_labels)
self.clf.fit(self.mega_histogram, train_labels)
print ("Training completed")
参考:代码