在视觉领域,相似度比较出现在了非常多的应用之中,但是其性能精度,大多时候都不尽人意。抗干扰能力差,区分能力弱等问题困扰着开发者们。很多时候开发者需要花大量时间测试不同的算法在应用中的效果,此篇文章将从全局,局部,语义三个层面介绍相似度比较算法,帮助大家共同理解,代码输入图片使用(225,225)的尺寸,其他尺寸部分算法参数可能需要修改
全局比较算法表示算法计算整体图片的特征让后使用此特征进行匹配常见如hash类算法,hist直方图算法,ssim结构相似度算法
hash算法,主要从对图像的灰度进行计算,具体可参考hash,包括了平均hash,差值hash,感知hash
def aHash(img, shape=(10, 10)):
# 缩放为10*10
img = cv2.resize(img, shape)
# 转换为灰度图
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# s为像素和初值为0,hash_str为hash值初值为''
s = 0
hash_str = ''
# 遍历累加求像素和
for i in range(shape[0]):
for j in range(shape[1]):
s = s + gray[i, j]
# 求平均灰度
avg = s / 100
# 灰度大于平均值为1相反为0生成图片的hash值
for i in range(shape[0]):
for j in range(shape[1]):
if gray[i, j] > avg:
hash_str = hash_str + '1'
else:
hash_str = hash_str + '0'
return hash_str
def dHash(img, shape=(10, 10)):
# 缩放10*11
img = cv2.resize(img, (shape[0]+1, shape[1]))
# 转换灰度图
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
hash_str = ''
# 每行前一个像素大于后一个像素为1,相反为0,生成哈希
for i in range(shape[0]):
for j in range(shape[1]):
if gray[i, j] > gray[i, j + 1]:
hash_str = hash_str + '1'
else:
hash_str = hash_str + '0'
return hash_str
def pHash(img, shape=(10, 10)):
# 缩放32*32
img = cv2.resize(img, (32, 32)) # , interpolation=cv2.INTER_CUBIC
# 转换为灰度图
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 将灰度图转为浮点型,再进行dct变换
dct = cv2.dct(np.float32(gray))
# opencv实现的掩码操作
dct_roi = dct[0:10, 0:10]
hash = []
avreage = np.mean(dct_roi)
for i in range(dct_roi.shape[0]):
for j in range(dct_roi.shape[1]):
if dct_roi[i, j] > avreage:
hash.append(1)
else:
hash.append(0)
return hash
def cmpHash(hash1, hash2, shape=(10, 10)):
n = 0
# hash长度不同则返回-1代表传参出错
if len(hash1) != len(hash2):
return -1
# 遍历判断
for i in range(len(hash1)):
# 相等则n计数+1,n最终为相似度
if hash1[i] == hash2[i]:
n = n + 1
return n/(shape[0]*shape[1])
图像的颜色直方图表示了图像的颜色分布情况,两张不一样的图片的颜色分布大多数情况下会存在较大不同,但此方法忽略了轮廓的信息,直方图计算主要有单通道和多通道的区别
def calculate(image1, image2):
hist1 = cv2.calcHist([image1], [0], None, [256], [0.0, 255.0])
hist2 = cv2.calcHist([image2], [0], None, [256], [0.0, 255.0])
degree = 0
for i in range(len(hist1)):
if hist1[i] != hist2[i]:
degree = degree + (1 - abs(hist1[i] - hist2[i]) / max(hist1[i], hist2[i]))
else:
degree = degree + 1
degree = degree / len(hist1)
return degree
def classify_hist_with_rgb(image1, image2, size=(256, 256)):
image1 = cv2.resize(image1, size)
image2 = cv2.resize(image2, size)
sub_image1 = cv2.split(image1)
sub_image2 = cv2.split(image2)
sub_data = 0
for im1, im2 in zip(sub_image1, sub_image2):
sub_data += calculate(im1, im2)
sub_data = sub_data / 3
return sub_data
ssim结合了亮度,对比度,结构信息,算法相对复杂,可以使用skimage进行使用
def ssim(img1, img2):
width = img1.shape[1]
win_size = int(width/2-((width/2) % 2)+1)
out = structural_similarity(img1, img2, win_size=win_size, multichannel=True)
return out if out > 0 else 0
局部信息的相似度比较主要使用sift,orb等关键点的信息进行匹配,具有一定的尺度不变性和旋转不变性,能适应比较图片存在一定位移的情况ORB参考
orb相比sift等拥有更高的性能,且没有版权问题
def ORB_siml(img1, img2, params):
# 初始化ORB检测器
orb = cv2.ORB_create(nfeatures=200)
kp1, des1 = orb.detectAndCompute(img1, None)
kp2, des2 = orb.detectAndCompute(img2, None)
# 使用汉明距离对特侦点距离进行计算
bf = cv2.BFMatcher(cv2.NORM_HAMMING)
# 使用knn算法进行匹配
matches = bf.knnMatch(des1, trainDescriptors=des2, k=2)
# 去除模糊的匹配
good = [(m, n) for (m, n) in matches if m.distance < 0.95 * n.distance and m.distance < 70]
# 绘制匹配的关键点
# img3 = cv2.drawMatchesKnn(img1, kp1, img2, kp2, good, img2, flags=2)
similary = len(good) / len(matches)
return similary
语义层面的比较,指的是基于深度学习预训练模型输出的特征进行比较,因为基于imagenet的预训练模型对图片有非常强的特诊提取能力,对于画面主体类别单一,变化较大但是类别相同的情况能较好的区分,下面给出基于moilenetv2模型的转换和使用过程
import torch
import torchvision
import onnx
import onnxruntime as ort
from torchvision import transforms
from PIL import Image
import numpy as np
mobilev2 = torchvision.models.mobilenet_v2(pretrained=True)
new_classifier = torch.nn.Sequential(*list(mobilev2.children())[-1][:1])
mobilev2.classifier = new_classifier
mobilev2.eval()
torch.save(mobilev2 , "./mobilev2_1280.pt")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
mobilev2_1280 = torch.load('mobilev2_1280.pt')
mobilev2_1280.to(device)
mobilev2_1280.eval()
img_dir = r'/home/whh/whh_train/Classification/000000.jpg'
img = Image.open(img_dir)
trans = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
cudaimg = trans(img)
cudaimg.unsqueeze_(dim=0)
cudaimg=cudaimg.cuda()
tensorimg = trans(img)
tensorimg = tensorimg[None,:]
out_1280 = mobilev2_1280(cudaimg)
onnx
torch.onnx.export(mobilev2_1280 , # model being run
tensorimg, # model input (or a tuple for multiple inputs)
"mobilev2_1280.onnx", # where to save the model
# export_params=True, # store the trained parameter weights inside the model file
opset_version=10, # the ONNX version to export the model to
input_names = ['input'], # the model's input names
output_names = ['output'], # the model's output names
dynamic_axes={'input' : {0 : 'batch_size'}, # variable lenght axes
'output' : {0 : 'batch_size'}}
)
ort_sess = ort.InferenceSession('mobilev2_1280.onnx',providers=['CPUExecutionProvider'])
outputs = ort_sess.run(None, {'input': tensorimg.numpy()})
mobilev2:0.97900390625
ahash:0.86
dhash:0.67
phash:0.92
gpbhist:0.859699010848999
grayhist:0.818916916847229
ssim:0.6861183398780377
mobilev2:0.9423828125
ahash:0.76
dhash:0.59
phash:0.87
rgbhist:0.7028341293334961
grayhist:0.6859147548675537
ssim:0.4142923740661437
mobilev2:0.56396484375
ahash:0.44
dhash:0.56
phash:0.85
gpbhist:0.32259905338287354
grayhist:0.3330121636390686
ssim:0.05655745965670097
orb:0.0
mobilev2:0.7568359375
ahash:0.55
dhash:0.59
phash:0.62
gpbhist:0.1238623782992363
grayhist:0.1564500629901886
ssim:0.03365421905316316
mobilev2:0.80126953125
ahash:0.47
dhash:0.53
phash:0.68
gpbhist:0.18820995092391968
grayhist:0.2547611594200134
ssim:0.0
mobilev2:0.49462890625
ahash:0.45
dhash:0.44
phash:0.84
gpbhist:0.11006951332092285
grayhist:0.13269555568695068
ssim:0.027815015868851507
orb:0.07303370786516854