SIFT 特征包括兴趣点检测器和描述子。SIFT 描述子具有非常强的稳健性,这在很大程度上也是 SIFT 特征能够成功和流行的主要原因。
当我们在观察图像中的物体时,物体所在的图像不论背景或者角度如何变换,我们依然能很快的识别出图像中的目标物体。对机器来说,则需要定义图像上的关键特征并且能够根据这些特征搜索到一张新的图片。
SIFT,即尺度不变特征变换(Scale-invariant feature transform,SIFT)。是在计算机视觉任务中特征提取算法。
这种描述具有尺度不变性,可在图像中检测出关键点,是一种局部特征描述子。SIFT特征是基于物体上的一些局部外观的兴趣点而与影像的大小和旋转无关。可以拿来处理目标的旋转、缩放、平移(RST)、图像仿射/投影变换(视点viewpoint)、弱光照影响、部分目标遮挡(occlusion)、杂物场景(clutter)等问题
SIFT 特征使用高斯差分函数来定位兴趣点:
D(x,σ)=[Gκσ(x)-Gσ(x)]*I(x)=[Gκσ-Gσ]*I=Iκσ-Iσ
其中,Gσ 是上一章中介绍的二维高斯核,Iσ 是使用 Gσ 模糊的灰度图像,κ 是决定相差尺度的常数。兴趣点是在图像位置和尺度变化下 D(x,σ) 的最大值和最小值点。这些候选位置点通过滤波去除不稳定点。基于一些准则,比如认为低对比度和位于边上的点不是兴趣点,我们可以去除一些候选兴趣点。
上面讨论的兴趣点(关键点)位置描述子给出了兴趣点的位置和尺度信息。为了实现旋转不变性,基于每个点周围图像梯度的方向和大小,SIFT 描述子又引入了参考方向。SIFT 描述子使用主方向描述参考方向。主方向使用方向直方图(以大小为权重)来度量。
对于将一幅图像中的特征匹配到另一幅图像的特征,一种稳健的准则(同样是由Lowe 提出的)是使用这两个特征距离和两个最匹配特征距离的比率。相比于图像中的其他特征,该准则保证能够找到足够相似的唯一特征。使用该方法可以使错误的匹配数降低。
from array import array
from PIL import Image
from pylab import *
import sys
from PCV.localdescriptors import sift
if len(sys.argv) >= 3:
im1f, im2f = sys.argv[1], sys.argv[2]
else:
im1f = r'H:\第二章\building22.jpg'
im2f = r'H:\第二章\building23.jpg'
im1 = array(Image.open(im1f))
im2 = array(Image.open(im2f))
sift.process_image(im1f, 'out_sift_1.txt')
l1, d1 = sift.read_features_from_file('out_sift_1.txt')
figure()
gray()
subplot(121)
sift.plot_features(im1, l1, circle=False)
sift.process_image(im2f, 'out_sift_2.txt')
l2, d2 = sift.read_features_from_file('out_sift_2.txt')
subplot(122)
sift.plot_features(im2, l2, circle=False)
#matches = sift.match(d1, d2)
matches = sift.match_twosided(d1, d2)
print ('{} matches'.format(len(matches.nonzero()[0])))
figure()
gray()
sift.plot_matches(im1, im2, l1, l2, matches, show_below=True)
show()
这里调用的PCV包中的sift.py的代码如下:
from PIL import Image
from numpy import *
from pylab import *
import os
def process_image(imagename,resultname,params="--edge-thresh 10 --peak-thresh 5"):
""" Process an image and save the results in a file. """
if imagename[-3:] != 'pgm':
# create a pgm file
im = Image.open(imagename).convert('L')
im.save('tmp.pgm')
imagename = 'tmp.pgm'
cmmd = str("sift "+imagename+" --output="+resultname+
" "+params)
os.system(cmmd)
print('processed', imagename, 'to', resultname)
def read_features_from_file(filename):
""" Read feature properties and return in matrix form. """
f = loadtxt(filename)
return f[:,:4],f[:,4:] # feature locations, descriptors
def write_features_to_file(filename,locs,desc):
""" Save feature location and descriptor to file. """
savetxt(filename,hstack((locs,desc)))
def plot_features(im,locs,circle=False):
""" Show image with features. input: im (image as array),
locs (row, col, scale, orientation of each feature). """
def draw_circle(c,r):
t = arange(0,1.01,.01)*2*pi
x = r*cos(t) + c[0]
y = r*sin(t) + c[1]
plot(x,y,'b',linewidth=2)
imshow(im)
if circle:
for p in locs:
draw_circle(p[:2],p[2])
else:
plot(locs[:,0],locs[:,1],'ob')
axis('off')
def match(desc1,desc2):
""" For each descriptor in the first image,
select its match in the second image.
input: desc1 (descriptors for the first image),
desc2 (same for second image). """
desc1 = array([d/linalg.norm(d) for d in desc1])
desc2 = array([d/linalg.norm(d) for d in desc2])
dist_ratio = 0.6
desc1_size = desc1.shape
matchscores = zeros((desc1_size[0]),'int')
desc2t = desc2.T # precompute matrix transpose
for i in range(desc1_size[0]):
dotprods = dot(desc1[i,:],desc2t) # vector of dot products
dotprods = 0.9999*dotprods
# inverse cosine and sort, return index for features in second image
indx = argsort(arccos(dotprods))
# check if nearest neighbor has angle less than dist_ratio times 2nd
if arccos(dotprods)[indx[0]] < dist_ratio * arccos(dotprods)[indx[1]]:
matchscores[i] = int(indx[0])
return matchscores
def appendimages(im1,im2):
""" Return a new image that appends the two images side-by-side. """
# select the image with the fewest rows and fill in enough empty rows
rows1 = im1.shape[0]
rows2 = im2.shape[0]
if rows1 < rows2:
im1 = concatenate((im1,zeros((rows2-rows1,im1.shape[1]))), axis=0)
elif rows1 > rows2:
im2 = concatenate((im2,zeros((rows1-rows2,im2.shape[1]))), axis=0)
# if none of these cases they are equal, no filling needed.
return concatenate((im1,im2), axis=1)
def plot_matches(im1,im2,locs1,locs2,matchscores,show_below=True):
""" Show a figure with lines joining the accepted matches
input: im1,im2 (images as arrays), locs1,locs2 (location of features),
matchscores (as output from 'match'), show_below (if images should be shown below). """
im3 = appendimages(im1,im2)
if show_below:
im3 = vstack((im3,im3))
# show image
imshow(im3)
# draw lines for matches
cols1 = im1.shape[1]
for i,m in enumerate(matchscores):
if m>0:
plot([locs1[i][0],locs2[m][0]+cols1],[locs1[i][1],locs2[m][1]],'c')
axis('off')
def match_twosided(desc1,desc2):
""" Two-sided symmetric version of match(). """
matches_12 = match(desc1,desc2)
matches_21 = match(desc2,desc1)
ndx_12 = matches_12.nonzero()[0]
# remove matches that are not symmetric
for n in ndx_12:
if matches_21[int(matches_12[n])] != n:
matches_12[n] = 0
return matches_12
download_path = "H:\\第二章\\转存"
path = "H:\\第二章\\转存结果"
imlist = imtools.get_imlist(download_path)
nbr_images = len(imlist)
featlist = [imname[:-3] + 'sift' for imname in imlist]
for i, imname in enumerate(imlist):
sift.process_image(imname, featlist[i])
matchscores = zeros((nbr_images, nbr_images))
for i in range(nbr_images):
for j in range(i, nbr_images): # only compute upper triangle
print('comparing ', imlist[i], imlist[j])
l1, d1 = sift.read_features_from_file(featlist[i])
l2, d2 = sift.read_features_from_file(featlist[j])
matches = sift.match_twosided(d1, d2)
nbr_matches = sum(matches > 0)
print('number of matches = ', nbr_matches)
matchscores[i, j] = nbr_matches
# copy values
for i in range(nbr_images):
for j in range(i + 1, nbr_images): # no need to copy diagonal
matchscores[j, i] = matchscores[i, j]
# 可视化
threshold = 2 # min number of matches needed to craete link
g = pydot.Dot(graph_type='graph') # don't want the default directed graph
for i in range(nbr_images):
for j in range(i + 1, nbr_images):
if matchscores[i, j] > threshold:
# 图像对中的第一幅图像
im = Image.open(imlist[i])
im.thumbnail((100, 100))
filename = path + str(i) + '.png'
im.save(filename) # 需要一定大小的临时文件
g.add_node(pydot.Node(str(i), fontcolor='transparent',
shape='rectangle', image=filename))
# 图像对中的第二幅图像
im = Image.open(imlist[j])
im.thumbnail((100, 100))
filename = path + str(j) + '.png'
im.save(filename) # 需要一定大小的临时文件
g.add_node(pydot.Node(str(j), fontcolor='transparent',
shape='rectangle', image=filename))
g.add_edge(pydot.Edge(str(i), str(j)))
g.write_png('compare.png')
sift.py中match()函数中比例阈值原来是0.6,此时上面输出为7个匹配点
现在修改为0.9后,便有64个匹配点
(此时这里的比例阈值变大后虽然匹配的点数增多,但是如果阈值选择过大会导致匹配的点可靠性下降。所以要保证匹配点的质量就要选择一合理的比例阈值)
小结:总共拍摄了11张图片,将使用局部描述算子对同一地理位置拍摄的图像进行分类。为了对匹配后的图像进行连接可视化,将在一个图中用线将他们链接起来。
SIFT匹配点因为有通过当两幅图像的SIFT特征向量生成后,下一步我们采用关键点特征向量的欧式距离来作为两幅图像中关键点的相似性判定度量。(在调用的sift,py中:首先经过局部描述子匹配,将每对图像间的匹配特征数保存在matchscores数组中。然后使用该matchscores矩阵作为图像间简单的距离度量方式)取图像1中的某个关键点,并找出其与图像2中欧式距离最近的前两个关键点,在这两个关键点中,如果最近的距离除以次近的距离少于某个比例阈值,则接受这一对匹配点。然后进行可视化连接的图像,通过图像间是否具有匹配的局部描述子来定义图像间的连接;SIFT会筛选去掉低对比度或非常靠近边缘的关健点,所以在sift的特征匹配中,虽然可能匹配的点很少但是其准确性会很高