1、什么是基于内容的图像检索
1、构建基于内容的图像检索系统步骤
(1)定义你的图像描述符:在这个阶段你需要决定你想描述的图像的哪个方面。你对图像的颜色感兴趣吗?图像中物体的形状?或者你想表征纹理?
(2)特征提取和索引您的数据集: 现在您已经定义了图像描述符,您的工作是将此图像描述符应用于数据集中的每个图像,从这些图像提取特征并将特征写入存储(例如,CSV文件,RDBMS ,Redis等),以便稍后可以比较它们的相似性。此外,您需要考虑是否将使用任何专门的数据结构来促进更快的搜索。
(3)定义您的相似性度量: 我们现在有一个(可能是指数)特征向量的集合。但你如何比较它们的相似性呢?常用选项包括欧几里得距离,余弦距离和距离,但实际选择高度依赖于(1)数据集和(2)您提取的特征类型。
(4)搜索: 最后一步是执行实际搜索。用户将向系统提交查询图像(例如从上传表单或通过移动应用程序),并且您的工作将(1)从该查询图像中提取特征,然后(2)应用您的相似性函数进行比较该查询的功能已针对已编入索引的功能。从那里,你只需根据你的相似度函数返回最相关的结果。
2、CBIR和机器学习/图像分类有何不同
(1)机器学习包括使计算机完成诸如预测,分类,识别等智能人工任务的方法。此外,机器学习管理算法,使计算机能够执行这些智能任务 而不需要明确编程。
CBIR确实利用了一些机器学习技术 - 即降维和聚类,但是CBIR系统不执行任何实际学习。
(2)主要的 区别在于CBIR不 直接试图理解和解释图像的内容。相反,CBIR系统依赖于:
- 通过提取特征向量来量化图像。
- 假设特征向量的比较 - 具有相似特征向量的图像具有相似的视觉内容。
基于这两个组件,图像搜索引擎能够将查询与图像数据集进行比较,并返回最相关的结果,而不必实际“知道”图像的内容。
(3)在机器学习和图像分类中,能够学习和理解图像的内容需要一些训练集的概念 - 一组标记数据用于教计算机数据集中每个可视对象的外观。
(4)CBIR系统不需要标记数据。他们只需拍摄图像数据集,从每幅图像中提取特征,并使数据集可以在视觉上搜索。在某些方面,您可以将CBIR系统视为一种“哑”图像分类器,它没有标签概念来使自己更加智能 - 它仅依赖于(1)从图像中提取的特征和(2)相似性函数用于给用户提供有意义的结果。
2、构建CBIR系统
1、目录结构及作用
|--- pyimagesearch
from __future__ import print_function from pyimagesearch.cbir.resultsmontage import ResultsMontage from pyimagesearch.cbir.hsvdescriptor import HSVDescriptor from pyimagesearch.cbir.searcher import Searcher import argparse import imutils import json import cv2 ap = argparse.ArgumentParser() ap.add_argument("-i", "--index", required = True, help = "Path to where the features index will be stored") ap.add_argument("-q", "--query", required = True, help = "Path to the query image") ap.add_argument("-d", "--dataset", required = True, help = "Path to the original dataset directory") ap.add_argument("-r", "--relevant", required = True, help = "Path to relevant dictionary") args = vars(ap.parse_args()) desc = HSVDescriptor((4, 6, 3)) montage = ResultsMontage((240, 320), 5, 20) relevant = json.loads(open(args["relevant"]).read()) queryFilename = args["query"][args["query"].rfind("/") + 1:] queryRelevant = relevant[queryFilename] query = cv2.imread(args["query"]) print("[INFO] describing query...") cv2.imshow("Query", imutils.resize(query, width = 320)) features = desc.describe(query) print("[INFO] searching...") searcher = Searcher(args["index"]) results = searcher.search(features, numResults = 20) for (i, (score, resultID)) in enumerate(results): print("[INFO] {result_num}.{result} - score:.2f".format(result_num = i + 1, result = resultID, score = score)) result = cv2.imread("{}/{}".format(args["dataset"], resultID)) print ("resultID") print (resultID) montage.addResult(result, text = "#{}".format(i + 1), highlight = resultID in queryRelevant) cv2.imshow("Results", imutils.resize(montage.montage, height = 700)) cv2.imwrite("mo.png",montage.montage) cv2.waitKey(0)
index.py
from __future__ import print_function from pyimagesearch.cbir.hsvdescriptor import HSVDescriptor from imutils import paths import progressbar import argparse import cv2 ap= argparse.ArgumentParser() ap.add_argument("-d", "--dataset", required = True, help = "Path to the directory that contains the images to be indexed") ap.add_argument("-i", "--index", required = True, help = "Path to where the features index will be stored") args = vars(ap.parse_args()) desc = HSVDescriptor((4, 6, 3)) output = open(args["index"], "w") imagePaths = list(paths.list_images(args["dataset"])) widgets = ["Indexing:", progressbar.Percentage(), "", progressbar.Bar(), "", progressbar.ETA()] pbar = progressbar.ProgressBar(maxval = len(imagePaths), widgets = widgets) pbar.start() for (i, imagePath) in enumerate(imagePaths): filename = imagePath[imagePath.rfind("/") + 1:] image = cv2.imread(imagePath) features = desc.describe(image) features = [str(x) for x in features] output.write("{}, {}\n".format(filename, ",".join(features))) pbar.update(i) pbar.finish() print("[INFO] indexed {} images".format(len(imagePaths))) output.close()
dist.py
import numpy as np def chi2_distance(histA, histB, eps = 1e-10): d = 0.5 * np.sum(((histA - histB)**2)/(histA + histB + eps)) return d
hsvdescriptor.py
import numpy as np import cv2 import imutils class HSVDescriptor: def __init__(self, bins): self.bins = bins def describe(self, image): image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) features = [] (h, w) = image.shape[:2] (cX, cY) = (int(w * 0.5), int(h * 0.5)) segments = [(0, cX, 0, cY), (cX, w, 0, cY), (cX, w, cY, h), (0, cX, cY, h)] (axesX, axesY) = (int(w * 0.75)//2, int(h * 0.75)//2) ellipMask = np.zeros(image.shape[:2], dtype = "uint8") cv2.ellipse(ellipMask, (cX, cY), (axesX, axesY), 0, 0, 360, 255, - 1) for (startX, endX, startY, endY) in segments: cornerMask = np.zeros(image.shape[:2], dtype = "uint8") cv2.rectangle(cornerMask, (startX, startY), (endX, endY), 255, - 1) cornerMask = cv2.subtract(cornerMask, ellipMask) hist = self.histogram(image, cornerMask) features.extend(hist) hist = self.histogram(image, ellipMask) features.extend(hist) return np.array(features) def histogram(self, image, mask = None): hist = cv2.calcHist([image], [0, 1, 2], mask, self.bins, [ 0 , 180, 0, 256, 0, 256]) if imutils.is_cv2(): hist = cv2.normalize(hist).flatten() else: hist = cv2.normalize(hist, hist).flatten() return hist
resultsmontage.py
import numpy as np import cv2 class ResultsMontage: def __init__(self, imageSize, imagesPerRow, numResults): self.imageW = imageSize[0] self.imageH = imageSize[1] self.imagesPerRow = imagesPerRow numCols = numResults // imagesPerRow self.montage = np.zeros((numCols * self.imageW, imagesPerRow * self.imageH, 3), dtype="uint8") self.counter = 0 self.row = 0 self.col = 0 def addResult(self, image, text = None, highlight = False): if self.counter != 0 and self.counter %self.imagesPerRow == 0: self.col = 0 self.row += 1 image = cv2.resize(image, (self.imageH, self.imageW)) (startY, endY) = (self.row * self.imageW, (self.row + 1) * self.imageW) (startX, endX) = (self.col * self.imageH, (self.col + 1) * self.imageH) self.montage[startY:endY, startX:endX] = image if text is not None: cv2.putText(self.montage, text, (startX + 10, startY + 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 3) print ("text") if highlight: cv2.rectangle(self.montage, (startX + 3, startY + 3), (endX - 3, endY - 3), (0, 255, 0), 4) print ("hig") self.col += 1 self.counter +=1
searcher.py
from . import dists import csv class Searcher: def __init__(self, dbPath): self.dbPath = dbPath def search(self, queryFeatures, numResults = 10): results = {} with open(self.dbPath) as f: reader = csv.reader(f) for row in reader: features = [float(x) for x in row[1:]] d = dists.chi2_distance(features, queryFeatures) results[row[0]] = d f.close() results=sorted([(v,k) for (k,v) in results.items()]) return results[:numResults]
2、特征提取和索引
1、描述图像的三个方面:
- 颜色: 表征图像颜色的图像描述符试图模拟图像每个通道中像素强度的分布。这些方法包括基本颜色统计,如平均值,标准偏差和偏度,以及颜色直方图,“平面”和多维。
- 纹理: 纹理描述符试图模拟图像中物体的感觉,外观和整体触觉质量。一些(但不是全部)纹理描述符将图像转换为灰度,然后计算灰度共生矩阵(GLCM)并计算该矩阵的统计量,包括对比度,相关性和熵等(Haralick纹理)。更先进的纹理描述符,如局部二进制模式,尝试模型 模式也是如此。甚至还有更高级的纹理描述符,例如傅立叶和小波变换也存在,但仍然使用灰度图像。
- 形状: 绝大多数形状描述符方法依靠提取图像中对象的轮廓(即轮廓)。一旦我们有了轮廓,我们就可以计算简单的统计数据来表征轮廓,这正是Hu Moments和Zernike Moments所做的。这些统计数据可用于表示图像中对象的形状(轮廓)。在机器学习和对象识别的背景下, 面向梯度的直方图 也是一个不错的选择。
2、特征提取的定义
定义: 特征提取 是通过应用图像描述符从数据集中每个图像中提取特征来量化数据集的过程。通常,这些功能存储在磁盘上供 以后使用,并 使用专门的数据结构(例如倒排索引,kd树或随机投影林)进行索引,以加快查询速度。
3、定义相似度
1、常用距离度量
欧几里德:
from scipy.spatial import distance as dists dists.euclidean(A, B)
曼哈顿/城市大厦
dists.cityblock(A, B)
直方图交点
def histogram_intersection(H1, H2): return np.sum(np.minimum(H1, H2))
距离
def chi2_distance(histA, histB, eps=1e-10): return 0.5 * np.sum(((histA - histB) ** 2) / (histA + histB + eps)) chi2_distance(A, B)
余弦
dists.cosine(A, B)
海明
dists.hamming(A, B)
4、提取关键点和局部不变描述符
1、文件结构及作用:
|--- pyimagesearch
#coding=utf-8 from __future__ import print_function from pyimagesearch.descriptors.detectanddescribe import DetectAndDescribe from pyimagesearch.indexer.featureindexer import FeatureIndexer from imutils.feature import FeatureDetector_create, DescriptorExtractor_create from imutils import paths import argparse import imutils import cv2 ap = argparse.ArgumentParser() ap.add_argument("-d", "--dataset", required=True, help="Path to the directory that contains the images to be indexed") #图像目录路径 ap.add_argument("-f", "--features-db", required=True, help="Path to where the features database will be stored")#制定HDF5数据库储存在磁盘上的路径 ap.add_argument("-a", "--approx-images", type=int, default=500, help="Approximate # of images in the dataset")#该(可选)开关允许我们指定数据集中图像的近似数量 ap.add_argument("-b", "--max-buffer-size", type=int, default=50000, help="Maximum buffer size for # of features to be stored in memory")#一次一个地写入HDF5的特征向量 效率非常低。相反,将特征向量收集到内存中的一个大数组中然后在缓冲区满时将它们转储到HDF5会更有效。的值 -最大-缓冲器-大小 指定许多如何 特征向量可以被存储在存储器中,直到缓冲器被刷新到HDF5 args = vars(ap.parse_args()) detector = FeatureDetector_create("SURF")#获取关键点 descriptor = DescriptorExtractor_create("RootSIFT")#定义提取关键点特征方法 dad = DetectAndDescribe(detector, descriptor)#获取关键点和关键点特征向量 fi = FeatureIndexer(args["features_db"], estNumImages=args["approx_images"],maxBufferSize=args["max_buffer_size"], verbose=True) for (i, imagePath) in enumerate(paths.list_images(args["dataset"])): if i > 0 and i%10 == 0: fi._debug("processed {} images".format(i), msgType = "[PROGRESS]") filename = imagePath[imagePath.rfind("/") + 1:] image = cv2.imread(imagePath) image = imutils.resize(image, width = 320) image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) (kps, descs) = dad.describe(image) if kps is None or descs is None: continue fi.add(filename, kps, descs) fi.finish()
detectanddescribe.py
import numpy as np class DetectAndDescribe: def __init__(self, detector, descriptor): self.detector = detector self.descriptor = descriptor def describe(self, image, useKpList = True): kps = self.detector.detect(image) (kps, descs) = self.descriptor.compute(image, kps) if len(kps) == 0: return (None, None) if useKpList: kps = np.int0([kp.pt for kp in kps]) return (kps, descs)
baseindexer.py
from __future__ import print_function import numpy as np import datetime class BaseIndexer(object): def __init__(self, dbPath, estNumImages = 500, maxBufferSize = 50000, dbResizeFactor = 2, verbose = True): self.dbPath = dbPath self.estNumImages = estNumImages self.maxBufferSize = maxBufferSize self.dbResizeFactor = dbResizeFactor self.verbose = verbose self.idxs = {} def _wrieBuffers(self): pass def _writeBuffer(self, dataset, datasetName, buf, idxName, sparse = False): if type(buf) is list: end = self.idxs[idxName] + len(buf) else: end = self.idxs[idxName] + buf.shape[0] if end > dataset.shape[0]: self._debug("triggering '{}' db resize".format(datasetName)) self._resizeDataset(dataset, datasetName, baseSize = end) if sparse: buf = buf.toarray() self._debug("writing '{}' buffer".format(datasetName)) dataset[self.idxs[idxName]:end] = buf def _resizeDataset(self, dataset, dbName, baseSize = 0, finished = 0): origSize = dataset.shape[0] if finished > 0: newSize = finished else: newSize = baseSize * self.dbResizeFactor shape = list(dataset.shape) shape[0] = newSize dataset.resize(tuple(shape)) self._debug("old size of '{}':{:,};new size:{:,}".format(dbName, origSize, newSize)) def _debug(self, msg, msgType = "[INFO]"): if self.verbose: print("{} {} - {}".format(msgType, msg, datetime.datetime.now())) @staticmethod def featureStack(array, accum = None, stackMethod = np.vstack): if accum is None: accum = array else: accum = stackMethod([accum, array]) return accum
featureindexer.py
#coding=utf-8 from .baseindexer import BaseIndexer import numpy as np import h5py import sys class FeatureIndexer(BaseIndexer): def __init__(self, dbPath, estNumImages=500, maxBufferSize=50000, dbResizeFactor=2,verbose=True): super(FeatureIndexer, self).__init__(dbPath, estNumImages=estNumImages, maxBufferSize=maxBufferSize, dbResizeFactor=dbResizeFactor, verbose=verbose) self.db = h5py.File(self.dbPath, mode="w") self.imageIDDB = None self.indexDB = None self.featuresDB = None self.imageIDBuffer = [] self.indexBuffer = [] self.featuresBuffer = None self.totalFeatures = 0 self.idxs = {"index": 0, "features": 0} def add(self, imageID, kps, features): start = self.idxs["features"] + self.totalFeatures end = start + len(features) self.imageIDBuffer.append(imageID) self.featuresBuffer = BaseIndexer.featureStack(np.hstack([kps, features]),self.featuresBuffer) self.indexBuffer.append((start, end)) self.totalFeatures += len(features) if self.totalFeatures >= self.maxBufferSize: if None in (self.imageIDDB, self.indexDB, self.featuresDB): self._debug("initial buffer full") self._createDatasets() self._writeBuffers() def _createDatasets(self): avgFeatures = self.totalFeatures/float(len(self.imageIDBuffer)) approxFeatures = int(avgFeatures * self.estNumImages) fvectorSize = self.featuresBuffer.shape[1] if sys.version_info[0] < 3: dt = h5py.special_dtype(vlen = unicode) else: dt = h5py.special_dtype(vlen = str) self._debug("creating datasets...") self.imageIDDB = self.db.create_dataset("image_ids", (self.estNumImages, ), maxshape = (None, ), dtype = dt) self.indexDB = self.db.create_dataset("index", (self.estNumImages, 2), maxshape = (None, 2), dtype = "int") self.featuresDB = self.db.create_dataset("features", (approxFeatures, fvectorSize), maxshape = (None, fvectorSize), dtype = "float") def _writeBuffers(self): self._writeBuffer(self.imageIDDB, "image_ids", self.imageIDBuffer,"index") self._writeBuffer(self.indexDB, "index", self.indexBuffer, "index") self._writeBuffer(self.featuresDB, "features", self.featuresBuffer,"features") self.idxs["index"] += len(self.imageIDBuffer) self.idxs["features"] += self.totalFeatures self.imageIDBuffer = [] self.indexBuffer = [] self.featuresBuffer = None self.totalFeatures = 0 def finish(self): if None in (self.imageIDDB, self.indexDB, self.featuresDB): self._debug("minimum init buffer not reached", msgType = "[WARN]") self._createDatasets() self._debug("writing un - empty buffers...") self._writeBuffers() self._debug("compacting datasets...") self._resizeDataset(self.imageIDDB, "image_ids", finished = self.idxs["index"]) self._resizeDataset(self.indexDB, "index", finished = self.idxs["index"]) self._resizeDataset(self.featuresDB, "features", finished = self.idxs["features"]) self.db.close()
5、集群功能组成一个码本
1、文件结构及作用
多添加俩个新文件:cluster_features.py和vocabulary.py
|--- pyimagesearch
#coding=utf-8 from __future__ import print_function from pyimagesearch.ir.vocabulary import Vocabulary import argparse import pickle ap= argparse.ArgumentParser() ap.add_argument("-f", "--features-db", required = True, help = "Path to where the features database will be stored") ap.add_argument("-c", "--codebook", required = True, help = "Path to the output codebook") ap.add_argument("-k", "--clusters", type = int, default = 64, help = "# of clusters to generate")#小批量k-均值将生成的簇的数量(即可视词) ap.add_argument("-p", "--percentage",type = float, default = 0.25, help = "Percentage of total features to use when clustering" ) args = vars(ap.parse_args()) # 控制特征向量样本量的大小 voc = Vocabulary(args["features_db"]) vocab = voc.fit(args["clusters"], args["percentage"]) #获得生成的视觉词 - 整个采样和聚类过程由词汇 类抽象 print ("[INFO] storing cluster centers...") f = open(args["codebook"], "wb") f.write(pickle.dumps(vocab)) f.close()
vocabulary.py
from __future__ import print_function from sklearn.cluster import MiniBatchKMeans import numpy as np import datetime import h5py class Vocabulary: def __init__(self, dbPath, verbose = True): self.dbPath = dbPath self.verbose = verbose def fit(self, numClusters, samplePercent, randomState = None): db = h5py.File(self.dbPath) totalFeatures = db["features"].shape[0] sampleSize = int(np.ceil(samplePercent * totalFeatures)) print("sampleSize") print (sampleSize) idxs = np.random.choice(np.arange(0, totalFeatures), (sampleSize), replace = False) idxs.sort() data = [] self._debug("starting sampling...") for i in idxs: data.append(db["features"][i][2:]) self._debug("sampled {:,} features from a population of {:,}".format(len(idxs), totalFeatures)) self._debug("clustering with k = {:,}".format(numClusters)) clt = MiniBatchKMeans(n_clusters = numClusters, random_state = randomState) clt.fit(data) self._debug("cluster shape:{}".format(clt.cluster_centers_.shape)) db.close() return clt.cluster_centers_ def _debug(self, msg, msgType = "[INFO]"): if self.verbose: print("{} {} - {}".format(msgType, msg, datetime.datetime.now()))
6、可视化码本中的单词
注:在前面构建出关键点和局部不变特征描述符、建立集群码本后方可实现
作用:将码本中K-means分类后的直方图,转化为可视化图片
visuluze_centers.py
#coding=utf-8 from __future__ import print_function from pyimagesearch.resultsmontage import ResultsMontage from sklearn.metrics import pairwise import numpy as np import progressbar import argparse import pickle import h5py import cv2 ap = argparse.ArgumentParser() ap.add_argument("-d", "--dataset", required = True, help = "Path to the directory of indexed images") ap.add_argument("-f", "--features-db", required = True, help = "Path to the features database")#提取关键和局部不变描述符的数据记路经 ap.add_argument("-c", "--codebook", required = True, help = "Path to the codebook")#可视话词汇表中的视觉词汇路经 ap.add_argument("-o", "--output", required = True, help = "Path to output directory") args = vars(ap.parse_args()) vocab = pickle.loads(open(args["codebook"], "rb").read()) featuresDB = h5py.File(args["features_db"], mode = "r") print("[INFO] starting distance distance computations...") vis = {i:[] for i in np.arange(0, len(vocab))} widgets = ["Comparing:", progressbar.Percentage(), "", progressbar.Bar(), "", progressbar.ETA()] pbar = progressbar.ProgressBar(maxval = featuresDB["image_ids"].shape[0], widgets = widgets).start() for (i, imageID) in enumerate(featuresDB["image_ids"]): (start, end) = featuresDB["index"][i] rows = featuresDB["features"][start:end] (kps, descs) = (rows[:, :2], rows[:, 2:]) for (kp, features) in zip(kps, descs): features = features.reshape(1, - 1) D = pairwise.euclidean_distances(features, Y = vocab)[0] for j in np.arange(0, len(vocab)): topResults = vis.get(j) topResults.append((D[j], kp, imageID)) topResults = sorted(topResults, key = lambda r:r[0])[:16] vis[j] = topResults pbar.update(i) pbar.finish() featuresDB.close() print("[INFO] writing visualizations to file...") for (vwID, results) in vis.items(): montage = ResultsMontage((64, 64), 4, 16) for (_, (x, y), imageID) in results: p = "{}/{}".format(args["dataset"], imageID) image = cv2.imread(p) (h, w) = image.shape[:2] (startX, endX) = (max(0, int(x) - 32), min(w, int(x) + 32)) (startY, endY) = (max(0, int(y) - 32), min(h, int(y) + 32)) roi = image[startY:endY, startX, endX] montage.addResult(roi) p = "{}/vis_{}.jpg".format(args["output"], vwID) cv2.imwrite(p, cv2.cvtColor(montage.montage, cv2.COLOR_BGR2GRAY))
7、矢量量化
7.1、从多个特征到单个直方图
bagofvisualwords.py
from sklearn.metrics import pairwise from scipy.sparse import csr_matrix import numpy as np class BagOfVisualWords: def __init__(self, codebook, sparse = True): self.codebook = codebook self.sparse = sparse def describe(self, features): D = pairwise.euclidean_distances(features, Y = self.codebook) (words, counts) = np.unique(np.argmin(D, axis = 1), return_counts = True) if self.sparse: hist = csr_matrix((counts, (np.zeros((len(words), )), words)), shape = (1, len(self.codebook)), dtype = "float") else: hist = np.zeros((len(self.codebook), ), dtype = "float") hist[words] = counts return hist
quantize_example.py
from __future__ import print_function from pyimagesearch.ir.bagofvisualwords import BagOfVisualWords from sklearn.metrics import pairwise import numpy as np np.random.seed(42) vocab = np.random.uniform(size = (3, 6)) features = np.random.uniform(size = (10, 6)) print("[INFO] vocabulary:\n{}\n".format(vocab)) print("[INFO] features:\n{}\n".format(features)) hist = np.zeros((3,), dtype = "int32") for (i, f) in enumerate(features): D = pairwise.euclidean_distances(f.reshape(1, -1), Y = vocab) j = np.argmin(D) print("[INFO] Closest visual word to feature #{}:{}".format(i, j)) hist[j] += 1 print("[INFO] Updated histogram:{}".format(hist)) bovw = BagOfVisualWords(vocab, sparse = False) hist = bovw.describe(features) print("[INFO] BOVW histogram:{}".format(hist))
7.2、形成BOVW
运行命令:python extract_bovw.py --features-db output/features.hdf5 --codebook output/vocab.cpickle --bovw-db output/bovw.hdf5 --idf output/idf.cpickle
extract_bovw.py
from pyimagesearch.ir.bagofvisualwords import BagOfVisualWords from pyimagesearch.indexer.bovwindexer import BOVWIndexer import argparse import pickle import h5py ap = argparse.ArgumentParser() ap.add_argument("-f", "--features-db", required = True, help = "Path to the features database") #关键点和局部不变描述符课程中构建的HDF5数据集的路径。该数据库应包含与数据集中每个图像相关的图像ID,索引和原始关键点/特征向量 ap.add_argument("-c", "--codebook", required = True, help = "Path to the codebook")#我们需要可视化码本的路径 ap.add_argument("-b", "--bovw-db", required = True, help = "Path to where the bag-of-visual-words database will be stored")#将BOVW表示存储在一个单独的HDF5数据库中的路经 ap.add_argument("-d", "--idf", required = True, help = "Path to inverse document frequency counts will be stored") ap.add_argument("-s", "--max-buffer-size", type = int, default = 500, help = "Maximum buffer size for # of features to be stored in memory")#在写入HDF5数据集之前在内存中管理一个原始特征向量的缓冲区,我们将在这里做同样的事情 - 这次我们将管理一个BOVW直方图缓冲区 args = vars(ap.parse_args()) vocab = pickle.loads(open(args["codebook"], "rb").read()) bovw = BagOfVisualWords(vocab) featuresDB = h5py.File(args["features_db"], mode = "r") bi = BOVWIndexer(bovw.codebook.shape[0], args["bovw_db"], estNumImages = featuresDB["image_ids"].shape[0], maxBufferSize = args["max_buffer_size"]) for (i, (imageID, offset)) in enumerate(zip(featuresDB["image_ids"], featuresDB["index"])): if i > 0 and i%10 == 0: bi._debug("processed {} images".format(i), msgType = "[PROGRESS]") features = featuresDB["features"][offset[0]:offset[1]][:, 2:] hist = bovw.describe(features) bi.add(hist) featuresDB.close() bi.finish() f = open(args["idf"], "wb") f.write(pickle.dumps(bi.df(method = "idf"))) f.close()
bovwindexer.py
from .baseindexer import BaseIndexer from scipy import sparse import numpy as np import h5py class BOVWIndexer(BaseIndexer): def __init__(self, fvectorSize, dbPath, estNumImages = 500, maxBufferSize = 500, dbResizeFactor = 2, verbose = True): super(BOVWIndexer, self).__init__(dbPath, estNumImages = estNumImages, maxBufferSize = maxBufferSize, dbResizeFactor = dbResizeFactor, verbose = verbose) self.db = h5py.File(self.dbPath, mode = "w") self.bovwDB = None self.bovwBuffer = None self.idxs = {"bovw":0} self.fvectorSize = fvectorSize self._df = np.zeros((fvectorSize, ), dtype = "float") self.totalImages = 0 def add(self, hist): self.bovwBuffer = BaseIndexer.featureStack(hist, self.bovwBuffer, stackMethod = sparse.vstack) self._df[np.where(hist.toarray()[0] > 0)] += 1 if self.bovwBuffer.shape[0] >= self.maxBufferSize: if self.bovwDB is None: self._debug("initial buffer full") self._createDatasets() self._writeBuffers() def _writeBuffers(self): if self.bovwBuffer is not None and self.bovwBuffer.shape[0] > 0: self._writeBuffer(self.bovwDB, "bovw", self.bovwBuffer, "bovw", sparse = True) self.idxs["bovw"] += self.bovwBuffer.shape[0] self.bovwBuffer = None def _createDatasets(self): self._debug("creating datasets...") self.bovwDB = self.db.create_dataset("bovw", (self.estNumImages, self.fvectorSize), maxshape = (None, self.fvectorSize), dtype = "float") def finish(self): if self.bovwDB is None: self._debug("minimum init buffer not reached", msgType = "[WARN]") self._createDatasets() self._debug("writing un-empty buffers...") self._writeBuffers() self._debug("compacting datasets...") self._resizeDataset(self.bovwDB, "bovw", finished = self.idxs["bovw"]) self.totalImage = self.bovwDB.shape[0] self.db.close() def df(self, method = None): if method == "idf": return np.log(self.totalImages/(1.0 + self._df)) return sel._df
8、反转索引和搜索
8.1、建立倒排索引
1、文件结构
|--- pyimagesearch
import numpy as np class RedisQueue: def __init__(self, redisDB): self.redisDB = redisDB def add(self, imageIdx, hist):#imageIdx:image_ids中HDF5数据集中图像的索引。hist:从图像中提取的BOVW直方图 p = self.redisDB.pipeline() for i in np.where(hist > 0)[0]: p.rpush("vw:{}".format(i), imageIdx) p.execute() def finish(self): self.redisDB.save()
build_redis_index.py
from __future__ import print_function from pyimagesearch.db.redisqueue import RedisQueue from redis import Redis import h5py import argparse ap = argparse.ArgumentParser() ap.add_argument("-b", "--bovw-db", required = True, help = "Path to where the bag-of-visual-words database") args = vars(ap.parse_args()) redisDB = Redis(host = "localhost", port = 6379, db = 0) rq = RedisQueue(redisDB) bovwDB = h5py.File(args["bovw_db"], mode = "r") for (i, hist) in enumerate(bovwDB["bovw"]): if i > 0 and i%10 == 0: print("[PROGRESS] processed {} entries".format(i)) rq.add(i, hist) bovwDB.close() rq.finish()
8.2 执行搜索
文件目录结构:
|--- pyimagesearch
运行命令:python search.py --dataset ../ukbench --features-db output/features.hdf5 --bovw-db output/bovw.hdf5 --codebook output/vocab.cpickle --relevant ../ukbench/relevant.json --query ../ukbench/ukbench00258.jpg
from __future__ import print_function from pyimagesearch.descriptors.detectanddescribe import DetectAndDescribe from pyimagesearch.ir.bagofvisualwords import BagOfVisualWords from pyimagesearch.ir.searcher import Searcher from pyimagesearch.ir.dists import chi2_distance from pyimagesearch.resultsmontage import ResultsMontage from scipy.spatial import distance from redis import Redis from imutils.feature import FeatureDetector_create, DescriptorExtractor_create import argparse import pickle import imutils import json import cv2 ap = argparse.ArgumentParser() ap.add_argument("-d", "--dataset", required = True, help = "Path to the directory of indexed images") ap.add_argument("-f", "--features-db", required = True, help = "Path to the features database") ap.add_argument("-b", "--bovw-db", required = True, help = "Path to the bag-of-visual-words database") ap.add_argument("-c", "--codebook", required = True, help = "Path to relevant dictionary") ap.add_argument("-i", "--idf", type = str, help = "Path to inverted document frequencies array") ap.add_argument("-r", "--relevant", required = True, help = "Path to relevant dictionary") ap.add_argument("-q", "--query", required = True, help = "Path to the query image") args = vars(ap.parse_args()) detector = FeatureDetector_create("SURF") descriptor = DescriptorExtractor_create("RootSIFT") dad = DetectAndDescribe(detector, descriptor) distanceMetric = chi2_distance idf = None if args["idf"] is not None: idf = pickle.loads(open(args["idf"], "rb").read()) vocab = pickle.loads(open(args["codebook"], "rb").read()) bovw = BagOfVisualWords(vocab) relevant = json.loads(open(args["relevant"]).read()) queryFilename = args["query"][args["query"].rfind("/") + 1:] queryRelevant = relevant[queryFilename] queryImage = cv2.imread(args["query"]) queryImage = imutils.resize(queryImage, width = 320) queryImage = cv2.cvtColor(queryImage, cv2.COLOR_BGR2GRAY) (_, descs) = dad.describe(queryImage) hist = bovw.describe(descs).tocoo() redisDB = Redis(host = "localhost", port = 6379, db = 0) searcher = Searcher(redisDB, args["bovw_db"], args["features_db"], idf = idf, distanceMetric = distanceMetric) sr = searcher.search(hist, numResults = 20) print("[INFO] search took:{:.2f}s".format(sr.search_time)) montage = ResultsMontage((240, 320), 5, 20) for (i, (score, resultID, resultIdx)) in enumerate(sr.results): print("[RESULT] {result_num}.{result} -{score:.2f}".format(result_num = i + 1, result = resultID, score = score)) result = cv2.imread("{}/{}".format(args["dataset"], resultID)) montage.addResult(result, text = "#{}".format(i + 1), highlight = resultID in queryRelevant) cv2.imshow("Result", imutils.resize(montage.montage, height = 700)) cv2.waitKey(0) searcher.finish()
searcher.py
from .searchresult import SearchResult from .dists import chi2_distance import numpy as np import datetime import h5py class Searcher: def __init__(self, redisDB, bovwDBPath, featuresDBPath, idf = None, distanceMetric = chi2_distance): self.redisDB = redisDB self.idf = idf self.distanceMetric = distanceMetric self.bovwDB = h5py.File(bovwDBPath, mode = "r") self.featuresDB = h5py.File(featuresDBPath, "r") def search(self, queryHist, numResults = 10, maxCandidates = 200): startTime = datetime.datetime.now() candidateIdxs = self.buildCandidates(queryHist, maxCandidates) candidateIdxs.sort() hists = self.bovwDB["bovw"][candidateIdxs] queryHist = queryHist.toarray() results = {} if self.idf is not None: queryHist *= self.idf for (candidate, hist) in zip(candidateIdxs, hists): if self.idf is not None: hist *=self.idf d = self.distanceMetric(hist, queryHist) results[candidate] = d results = sorted([(v, self.featuresDB["image_ids"][k], k) for (k, v) in results.items()]) results = results = results[:numResults] return SearchResult(results, (datetime.datetime.now() - startTime).total_seconds()) def buildCandidates(self, hist, maxCandidates): p = self.redisDB.pipeline() for i in hist.col: p.lrange("vw:{}".format(i), 0, -1) pipelineResults = p.execute() candidates = [] for results in pipelineResults: results = [int(r) for r in results] candidates.extend(results) (imageIdxs, counts) = np.unique(candidates, return_counts = True) imageIdxs = [i for (c, i) in sorted(zip(counts, imageIdxs), reverse = True)] return imageIdxs[:maxCandidates] def finish(self): self.bovwDB.close() self.featuresDB.close()
dists.py
import numpy as np def chi2_distance(histA, histB, eps = 1e-10): d = 0.5 * np.sum(((histA - histB)**2)/(histA + histB + eps)) return d
searchersult.py
from collections import namedtuple SearchResult = namedtuple("SearchResult", ["results", "search_time"])
9、评估
evaluate.py
from __future__ import print_function from pyimagesearch.descriptors.detectanddescribe import DetectAndDescribe from pyimagesearch.ir.bagofvisualwords import BagOfVisualWords from pyimagesearch.ir.searcher import Searcher from pyimagesearch.ir.dists import chi2_distance from scipy.spatial import distance from redis import Redis from imutils.feature import FeatureDetector_create, DescriptorExtractor_create import numpy as np import progressbar import argparse import pickle import imutils import json import cv2 ap = argparse.ArgumentParser() ap.add_argument("-d", "--dataset", required = True, help = "Path to the directory of indexed images") ap.add_argument("-f", "--features-db", required = True, help = "Path to the features database") ap.add_argument("-b", "--bovw-db", required=True, help = "Path to the bag-of-visual-words database") ap.add_argument("-c", "--codebook", required = True, help = "Path to the codebook") ap.add_argument("-i", "--idf", type = str, help = "Path to inverted document frequencies array") ap.add_argument("-r", "--relevant", required = True, help = "Path to relevant dictionary") args = vars(ap.parse_args()) detector = FeatureDetector_create("SURF") descriptor = DescriptorExtractor_create("RootSIFT") dad = DetectAndDescribe(detector, descriptor) distanceMetric = chi2_distance idf = None if args["idf"] is not None: idf = pickle.loads(open(args["idf"], "rb").read()) distanceMetric = distance.cosine vocab = pickle.loads(open(args["codebook"], "rb").read()) bovw = BagOfVisualWords(vocab) redisDB = Redis(host = "localhost", port = 6379, db = 0) searcher = Searcher(redisDB, args["bovw_db"], args["features_db"], idf = idf, distanceMetric = distanceMetric) relevant = json.loads(open(args["relevant"]).read()) queryIDs = relevant.keys() accuracies = [] timings = [] widgets = ["Evaluating:", progressbar.Percentage(), "", progressbar.Bar(), "", progressbar.ETA()] pbar = progressbar.ProgressBar(maxval = len(queryIDs), widgets = widgets).start() for (i, queryID) in enumerate(sorted(queryIDs)): queryRelevant = relevant[queryID] p = "{}/{}".format(args["dataset"], queryID) queryImage = cv2.imread(p) quertImage = imutils.resize(queryImage, width = 320) queryImage = cv2.cvtColor(queryImage, cv2.COLOR_BGR2GRAY) (_, descs) = dad.describe(queryImage) hist = bovw.describe(descs).tocoo() sr = searcher.search(hist, numResults = 4) results = set([r[1] for r in sr.results]) inter = results.intersection(queryRelevant) accuracies.append(len(inter)) timings.append(sr.search_time) pbar.update(i) searcher.finish() pbar.finish() accuracies = np.array(accuracies) timings = np.array(timings) print("[INFO] ACCURACY:u = {:.2f}, o = {:.2f}".format(accuracies.mean(), accuracies.std())) print("[INFO] TIMINGS:u = {:.2f}, o = {:.2f}".format(timings.mean(), timings.std()))