图像搜索工具箱Caltech Large Scale Image Search Toolbox是Mohamed Alaa El Dien Aly在加州理读博期间所作,目前他已加入沙特阿拉伯KAUST的Visual Computing Center,可查看个人主页。该工具箱的主要功能是针对大规模图象集进行基于BOW的图像检索。为加快速度,在图像检索上采用了LSH(Locality Sensitive Hashing,位置敏感哈希)、Inverted File Index和Min-Hash等方法,在局部特征生成词典方面采用了AKM(Approximate K-Means)和HKM(Hierarchical K-Means)方法。虽然该工具箱采用的BOW、词典生成在图像识别中目前已不是主流,但LSH、Inverted File Index、Min-Hash、AKM、HKM等方法在图像检索中仍有较大的参考和使用价值。尤其是快速检索方法LSH,该方法自2005年欧式空间方案EELSH诞生至今虽然已超过十年,但在快速图像检索上仍然有很大优势。而在文本处理上,BOW是文档向量化的重要方法之一。因为这期间应该没有更有效的检索方法出现。今年上半年,国内著名研究机构提出了将深度学习与LSH结合的检索方法,只是方法融合而已。
下面简单介绍该工具箱的示例程序Demo.m,这里是部分代码展示,完整版请参看工具箱源码。

% Author: Mohamed Aly 
% Date: October 6, 2010 
root = pwd;
addpath(fullfile(root,'caltech-image-search-1.0')) ;
% 基于词袋法的搜索函数
bag_of_words(); 
% 创建多种特征索引结构
full_representation(); 
%--------------------------------------------------------------------------
% Bag of Words 
%--------------------------------------------------------------------------
function bag_of_words() 
% 设置随机数初始化种子
old_seed = ccvRandSeed(123, 'set'); 
% 生成随机数据,也可读入图像数据进行处理
fprintf('Creating features\n');
num_images = 100;
features_per_image = 1000;
dim = 128;
num_features = num_images * features_per_image; 
features = uint8(ceil(rand(dim, num_features) * 255));
labels = reshape(repmat(uint32(1:num_images), features_per_image, 1), [], 1)'; 
% 指定词典生成方法
dict_type = 'akmeans'; 
fprintf('Building the dictionary: %s\n', dict_type);
%设置不同词典生成方法的参数
switch dict_type
  %生成AKM(Approximate k-means)词典
  case 'akmeans'
    num_words = 100;
    num_iterations = 5;
    num_trees = 2;
    dict_params =  {num_iterations, 'kdt', num_trees}; 
  %生成HKM(Hierarchical k-means) 词典
  case 'hkmeans'
    num_words = 100;
    num_iterations = 5;
    num_levels = 2;
    num_branches = 10;
    dict_params = {num_iterations, num_levels, num_branches};
end; % switch 
%生成词典
dict_words = ccvBowGetDict(features, [], [], num_words, 'flat', dict_type, [], dict_params);
 % 为特征生成单词表(单词表是一个cell类型数组,每个元素表示一个单词。每幅图像对应于一个单词,单词中包含图像特征ID)
fprintf('Computing the words\n');
dict = ccvBowGetWordsInit(dict_words, 'flat', dict_type, [], dict_params);
words = cell(1, num_images);
for i=1:num_images
  words{i} = ccvBowGetWords(dict_words, features(:,labels==i), [], dict);
end;
ccvBowGetWordsClean(dict); 
% 为单词表生成逆文档索引
fprintf('Creating and searching an inverted file\n');
if_weight = 'none';
if_norm = 'l1';
if_dist = 'l1';
inv_file = ccvInvFileInsert([], words, num_words);
ccvInvFileCompStats(inv_file, if_weight, if_norm); 
% 通过逆文档索引搜索前两个单词
[ids dists] = ccvInvFileSearch(inv_file, words(1:2), if_weight, if_norm, ...
  if_dist, 5) 
ccvInvFileClean(inv_file); 
ccvRandSeed(old_seed, 'restore'); 
% 最小哈希LSH(Min-Hash LSH)索引
fprintf('Creating and searching a Min-Hash LSH index\n');
ntables = 3;
nfuncs = 2;
dist = 'jac'; 
% 生成并插入索引
lsh = ccvLshCreate(ntables, nfuncs, 'min-hash', dist, 0, 0, 0, 100);
ccvLshInsert(lsh, words, 0); 
%在最小哈希LSH索引上搜索前两个单词
[ids dists] = ccvLshKnn(lsh, words, words(1:2), 5, dist) 
ccvLshClean(lsh); 
end % bag_of_words function 
%--------------------------------------------------------------------------
% Full Representation
%--------------------------------------------------------------------------
function full_representation()
old_seed = ccvRandSeed(123, 'set'); 
fprintf('Creating features\n');
num_images = 100;
features_per_image = 1000;
dim = 128;
num_features = num_images * features_per_image; 
features = uint8(ceil(rand(dim, num_features) * 255));
labels = reshape(repmat(uint32(1:num_images), features_per_image, 1), [], 1)'; 
% 定义最近邻搜索( Nearest Neighbor search )方法的类型
nn_types = {'kdt', 'hkm', 'lsh-l2', 'lsh-simplex'}; 
for nni=1:length(nn_types);
  % 获取搜索类型
  type = nn_types{nni};  
  % 生成索引
  fprintf('\nCreating index %d: %s\n', nni, type);
  switch type
    % Kd-Tree类型索引
    case 'kdt'
      ntrees = 4;
      index = ccvKdtCreate(features, ntrees); 
    % Hierarchical K-Means类型索引
    case 'hkm'
      nlevels = 4;
      nbranches = 10;
      niterations = 20;
      index = ccvHkmCreate(features, niterations, nlevels, nbranches);      
    % LSH-L2类型索引
    case 'lsh-l2'
      ntables = 4;
      nfuncs = 20;
      index = ccvLshCreate(ntables, nfuncs, 'l2', 'l2', 1, dim, .1, 1000);
      ccvLshInsert(index, features);      
    % LSH-Simplex类型索引
    case 'lsh-simplex'
      ntables = 4;
      nfuncs = 2;
      index = ccvLshCreate(ntables, nfuncs, 'sph-sim', 'l2', 1, dim, .1, 1000);
      ccvLshInsert(index, features);
  end; 
  % 对特征进行最近邻搜索
  fprintf('Searching for first image\n');
  switch type
    case 'kdt'
      [nnids nndists] = ccvKdtKnn(index, features, features(:,labels==1), 2);
    case 'hkm'
      [nnids nndists] = ccvHkmKnn(index, features, features(:,labels==1), 2);
    case {'lsh-l2', 'lsh-simplex'}
      [nnids nndists] = ccvLshKnn(index, features, features(:,labels==1), 2);
  end;   
  % 获取特征数最多的索引
  nnlabels = labels(nnids(1,:));
  counts = histc(nnlabels, 1:num_images);
  [counts cids] = sort(counts, 'descend');
  counts(1), cids(1)  
  %销毁索引
  switch type
    case 'kdt'
      ccvKdtClean(index);
    case 'hkm'
      ccvHkmClean(index);
    case {'lsh-l2', 'lsh-simplex'}
      ccvLshClean(index);
  end; 
end;  
ccvRandSeed(old_seed, 'restore'); 
end  
end