使用(SIFT特征KMeans聚类关键点训练SVM)实现自然图像中的logo商标识别和定位

(本博客只记录方法,因为本人觉得这是机器学习特征工程中一种比较不错的做法

上一篇博客中的方法:使用Py-OpenCV(SIFT关键点)实现自然图像中的logo商标识别和定位

当然也能提前欲知该方法的缺点,对于新的logo需要重新训练模型,而且所需的数据集远大于上面的方法。

 

实现方法:

bb = [v for v in image_to_descriptors.values()]

  1. 按图片类别抽取训练集中所有图片的SIFT特征;
  2. 将每一类图片的SIFT特征聚类为K类,构成该类的visual vocabulary(其size为K);
  3. 对于训练集中的每一张图片,统计vocabulary中K个word的“词频”,得到相应的直方图;
  4. 将直方图作为样本向量即可构建SVM的训练数据和测试数据;

实现代码

(来自网络)

logoFind.py:

from sklearn.model_selection import train_test_split
from __future__ import division
import cv2
import numpy as np
import glob
import os
from sklearn.cluster import KMeans, MiniBatchKMeans
import scipy.cluster.vq as vq
import pdb
import pickle as pickle
import numpy
from sklearn import svm
import random
import os.path
from scipy import spatial
from remove_bad_matches import remove_bad_matches
from sklearn.preprocessing import LabelEncoder

def get_surf_descriptors(image_name):
     surf = cv2.xfeatures2d.SURF_create(400)
     img = cv2.imread(image_name)
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
     [kp, desc] =  surf.detectAndCompute(gray, None)
     return kp, desc

#aa = get_surf_descriptors('/home/byz/dataset/Logo/FlickrLogos-v2/classes/jpg/adidas/144503924.jpg')

def build_dataset(list_of_image_names):
     image_to_descriptors = {}
     for index,image_name in enumerate(list_of_image_names):
          image_to_descriptors[image_name] = get_surf_descriptors(image_name)[1]
     return image_to_descriptors
     # [image_name,图片surf特征]

def create_labels_matrix(image_names):
    labels_matrix = []
    for i in image_names:
        classname = i.split("/")[8]
        if classname == "no-logo":
            labels_matrix.append("no-logo")
        elif classname == "starbucks":
            labels_matrix.append("starbucks")
        else:
            print("无此类:" + classname)
    return labels_matrix

def create_histogram(labels):
     hist, edges = np.histogram(labels, bins=range(num_clusters + 1), normed=True)
     return hist


label_dict = {0: 'starbucks', 1: 'no-logo'}
folder_name = "/home/raini/datasets/Logo/FlickrLogos-v2/classes/jpg/starbucks/"
other_folder_name = "/home/raini/datasets/Logo/FlickrLogos-v2/classes/jpg/no-logo/"
folder_name = "/home/byz/dataset/Logo/FlickrLogos-v2/classes/jpg/starbucks/"
other_folder_name = "/home/byz/dataset/Logo/FlickrLogos-v2/classes/jpg/no-logo/"
image_names = glob.glob(os.path.join(folder_name, "*.jpg")) + glob.glob(os.path.join(other_folder_name, "*.jpg"))
num_clusters = 1000
all_image_histograms = {}
# ===== 1. 按图片类别抽取训练集中所有图片的SIFT特征;Read in logo + mask images and compute sift descriptors
image_to_descriptors = build_dataset(image_names) # [image_name,图片surf特征]
all_descriptors = np.vstack(image_to_descriptors.values()) #合并数组中元素(6070张图片 × 其中每张图6000+[] = 19777869(len(all_descriptors)))
# all_descriptors_ = np.vstack([v for v in image_to_descriptors.values()]) #结果和上面一样

# ===== 2. 将每一类图片的SIFT特征聚类为K类,构成该类的visual vocabulary(其size为K); Build vocab list using kmeans
kmeans = MiniBatchKMeans(n_clusters = num_clusters)
kmeans.fit(all_descriptors)
# MiniBatchKMeans(batch_size=100, compute_labels=True, init='k-means++',
#         init_size=None, max_iter=100, max_no_improvement=10,
#         n_clusters=1000, n_init=3, random_state=None,
#         reassignment_ratio=0.01, tol=0.0, verbose=0)

## ===== 3. 对于训练集中的每一张图片,统计vocabulary中K个word的“词频”,得到相应的直方图;Compute histogram for all logo images
for index,image_name in enumerate(image_names):
     if index % 1000 ==0: print(index)
     labels = kmeans.predict(image_to_descriptors[image_name]) # 预测每一个SIFT特征的类别,image_to_descriptors[image_name]里有多少特征就会得到多少个label
     all_image_histograms[image_name] = create_histogram(labels)

#test:
# ll = kmeans.predict(image_to_descriptors['/home/byz/dataset/Logo/FlickrLogos-v2/classes/jpg/starbucks/1210221035.jpg'])
# ch = create_histogram(ll)

# ===== Partition data
enc = LabelEncoder()
label_encoder = enc.fit(create_labels_matrix(all_image_histograms.keys()))
y = label_encoder.transform(create_labels_matrix(all_image_histograms.keys())) # (为什么全都是0)
x_train, x_test, y_train, y_test = train_test_split([v for v in all_image_histograms.values()], y,  test_size=.3, random_state=42)

from sklearn.svm import LinearSVC
clf = LinearSVC(C = 1000, loss = "l2")
clf = clf.fit(x_train, y_train)
a = clf.predict(x_test)
print("Accuracy is: %f" % (sum(a == y_test) / float(len(a))))
#Accuracy is: 0.991214



remove_bad_matches.py:
from collections import Counter
import numpy as np
max_count = 10

def remove_bad_matches(list_of_matches1, list_of_matches2):
    temp_list = Counter(str(e) for e in list_of_matches1)
    bad_list = []
    for i in temp_list: 
        if temp_list[i] > max_count:
            bad_list.append(i)
    counter = len(list_of_matches1) - 1
    #nump delete creates new array, so should only call once
    indexes_to_delete = []
    while(counter >= 0):
        if str(list_of_matches1[counter]) in bad_list:
            indexes_to_delete.append(counter)
        counter -= 1
    return np.delete(list_of_matches1, indexes_to_delete, axis=0), np.delete(list_of_matches2, indexes_to_delete, axis=0)

a = np.array([[1,2,3],[3,4,5],[1,2,3]])
b = np.array([[1,1,1],[2,2,2],[3,3,3]])
remove_bad_matches(a,b)
print(a)
print(b)

 

你可能感兴趣的:(计算机视觉,机器学习,openCV)