图像搜索工具箱Caltech Large Scale Image Search Toolbox是Mohamed Alaa El Dien Aly在加州理读博期间所作,目前他已加入沙特阿拉伯KAUST的Visual Computing Center,可查看个人主页。该工具箱的主要功能是针对大规模图象集进行基于BOW的图像检索。为加快速度,在图像检索上采用了LSH(Locality Sensitive Hashing,位置敏感哈希)、Inverted File Index和Min-Hash等方法,在局部特征生成词典方面采用了AKM(Approximate K-Means)和HKM(Hierarchical K-Means)方法。虽然该工具箱采用的BOW、词典生成在图像识别中目前已不是主流,但LSH、Inverted File Index、Min-Hash、AKM、HKM等方法在图像检索中仍有较大的参考和使用价值。尤其是快速检索方法LSH,该方法自2005年欧式空间方案EELSH诞生至今虽然已超过十年,但在快速图像检索上仍然有很大优势。而在文本处理上,BOW是文档向量化的重要方法之一。因为这期间应该没有更有效的检索方法出现。今年上半年,国内著名研究机构提出了将深度学习与LSH结合的检索方法,只是方法融合而已。
下面简单介绍该工具箱的示例程序Demo.m,这里是部分代码展示,完整版请参看工具箱源码。
% Author: Mohamed Aly
% Date: October 6, 2010
root = pwd;
addpath(fullfile(root,'caltech-image-search-1.0')) ;
% 基于词袋法的搜索函数
bag_of_words();
% 创建多种特征索引结构
full_representation();
%--------------------------------------------------------------------------
% Bag of Words
%--------------------------------------------------------------------------
function bag_of_words()
% 设置随机数初始化种子
old_seed = ccvRandSeed(123, 'set');
% 生成随机数据,也可读入图像数据进行处理
fprintf('Creating features\n');
num_images = 100;
features_per_image = 1000;
dim = 128;
num_features = num_images * features_per_image;
features = uint8(ceil(rand(dim, num_features) * 255));
labels = reshape(repmat(uint32(1:num_images), features_per_image, 1), [], 1)';
% 指定词典生成方法
dict_type = 'akmeans';
fprintf('Building the dictionary: %s\n', dict_type);
%设置不同词典生成方法的参数
switch dict_type
%生成AKM(Approximate k-means)词典
case 'akmeans'
num_words = 100;
num_iterations = 5;
num_trees = 2;
dict_params = {num_iterations, 'kdt', num_trees};
%生成HKM(Hierarchical k-means) 词典
case 'hkmeans'
num_words = 100;
num_iterations = 5;
num_levels = 2;
num_branches = 10;
dict_params = {num_iterations, num_levels, num_branches};
end; % switch
%生成词典
dict_words = ccvBowGetDict(features, [], [], num_words, 'flat', dict_type, [], dict_params);
% 为特征生成单词表(单词表是一个cell类型数组,每个元素表示一个单词。每幅图像对应于一个单词,单词中包含图像特征ID)
fprintf('Computing the words\n');
dict = ccvBowGetWordsInit(dict_words, 'flat', dict_type, [], dict_params);
words = cell(1, num_images);
for i=1:num_images
words{i} = ccvBowGetWords(dict_words, features(:,labels==i), [], dict);
end;
ccvBowGetWordsClean(dict);
% 为单词表生成逆文档索引
fprintf('Creating and searching an inverted file\n');
if_weight = 'none';
if_norm = 'l1';
if_dist = 'l1';
inv_file = ccvInvFileInsert([], words, num_words);
ccvInvFileCompStats(inv_file, if_weight, if_norm);
% 通过逆文档索引搜索前两个单词
[ids dists] = ccvInvFileSearch(inv_file, words(1:2), if_weight, if_norm, ...
if_dist, 5)
ccvInvFileClean(inv_file);
ccvRandSeed(old_seed, 'restore');
% 最小哈希LSH(Min-Hash LSH)索引
fprintf('Creating and searching a Min-Hash LSH index\n');
ntables = 3;
nfuncs = 2;
dist = 'jac';
% 生成并插入索引
lsh = ccvLshCreate(ntables, nfuncs, 'min-hash', dist, 0, 0, 0, 100);
ccvLshInsert(lsh, words, 0);
%在最小哈希LSH索引上搜索前两个单词
[ids dists] = ccvLshKnn(lsh, words, words(1:2), 5, dist)
ccvLshClean(lsh);
end % bag_of_words function
%--------------------------------------------------------------------------
% Full Representation
%--------------------------------------------------------------------------
function full_representation()
old_seed = ccvRandSeed(123, 'set');
fprintf('Creating features\n');
num_images = 100;
features_per_image = 1000;
dim = 128;
num_features = num_images * features_per_image;
features = uint8(ceil(rand(dim, num_features) * 255));
labels = reshape(repmat(uint32(1:num_images), features_per_image, 1), [], 1)';
% 定义最近邻搜索( Nearest Neighbor search )方法的类型
nn_types = {'kdt', 'hkm', 'lsh-l2', 'lsh-simplex'};
for nni=1:length(nn_types);
% 获取搜索类型
type = nn_types{nni};
% 生成索引
fprintf('\nCreating index %d: %s\n', nni, type);
switch type
% Kd-Tree类型索引
case 'kdt'
ntrees = 4;
index = ccvKdtCreate(features, ntrees);
% Hierarchical K-Means类型索引
case 'hkm'
nlevels = 4;
nbranches = 10;
niterations = 20;
index = ccvHkmCreate(features, niterations, nlevels, nbranches);
% LSH-L2类型索引
case 'lsh-l2'
ntables = 4;
nfuncs = 20;
index = ccvLshCreate(ntables, nfuncs, 'l2', 'l2', 1, dim, .1, 1000);
ccvLshInsert(index, features);
% LSH-Simplex类型索引
case 'lsh-simplex'
ntables = 4;
nfuncs = 2;
index = ccvLshCreate(ntables, nfuncs, 'sph-sim', 'l2', 1, dim, .1, 1000);
ccvLshInsert(index, features);
end;
% 对特征进行最近邻搜索
fprintf('Searching for first image\n');
switch type
case 'kdt'
[nnids nndists] = ccvKdtKnn(index, features, features(:,labels==1), 2);
case 'hkm'
[nnids nndists] = ccvHkmKnn(index, features, features(:,labels==1), 2);
case {'lsh-l2', 'lsh-simplex'}
[nnids nndists] = ccvLshKnn(index, features, features(:,labels==1), 2);
end;
% 获取特征数最多的索引
nnlabels = labels(nnids(1,:));
counts = histc(nnlabels, 1:num_images);
[counts cids] = sort(counts, 'descend');
counts(1), cids(1)
%销毁索引
switch type
case 'kdt'
ccvKdtClean(index);
case 'hkm'
ccvHkmClean(index);
case {'lsh-l2', 'lsh-simplex'}
ccvLshClean(index);
end;
end;
ccvRandSeed(old_seed, 'restore');
end
end