目录
sklearn 图片相似
二、余弦距离
三、汉明距离(效率高,计算速度快)
过滤相似图片,小图片
安装:
pip install scikit-image
from skimage.metrics import structural_similarity as ssim
import cv2
import numpy as np
img1 = cv2.imread('1.jpg')
img2 = cv2.imread('2.jpg')
img2 = np.resize(img2, (img1.shape[0], img1.shape[1], img1.shape[2]))
print(img2.shape)
print(img1.shape)
ssim = ssim(img1, img2, multichannel=True)
print(ssim)
有一次报错了:
"win_size exceeds image extent. If the input is a multichannel "
ValueError: win_size exceeds image extent. If the input is a multichannel (color) image, set multichannel=True.
调试发现相似度的输入图片宽高必须大于6
随机数图片测试:
import numpy as np
import cv2
from skimage.metrics import structural_similarity as ssima
def main():
# 1.创建白色背景图片
d = 400
img = np.ones((d, d, 3), np.uint8) * 255
# 2.循环随机绘制实心圆
for i in range(0, 100):
# 随机中心点
center_x = np.random.randint(0, high=d)
center_y = np.random.randint(0, high=d)
# 随机半径与颜色
radius = np.random.randint(5, high=d/5)
color = np.random.randint(0, high=256, size=(3, )).tolist()
cv2.circle(img, (center_x, center_y), radius, color, -1)
h_h=7
img1=img[:h_h,:h_h]
img2=img[h_h:h_h*2,h_h:h_h*2]
ssim = ssima(img1, img2, multichannel=True)
print(ssim)
# 3.显示结果
cv2.imshow("img", img)
cv2.waitKey()
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
余弦相似度用向量空间中两个向量夹角的余弦值作为衡量两个个体间差异的大小。两个向量越相似夹角越小,余弦值越接近1。相比距离度量,余弦相似度更加注重两个向量在方向上的差异,而非距离或长度上。
import numpy as np
from scipy.spatial.distance import pdist
x=np.random.random(10)
y=np.random.random(10)
# 代码实现方法一
dist1 = 1 - np.dot(x,y)/(np.linalg.norm(x)*np.linalg.norm(y))
# 代码实现方法二
dist2 = pdist(np.vstack([x,y]),'cosine')
print('x',x)
print('y',y)
print('dist1',dist1)
print('dist2',dist2)
汉明距离表示两个(相同长度)字对应位不同的数量,我们以d(x,y)表示两个字x,y之间的汉明距离。对两个字符串进行异或运算,并统计结果为1的个数,那么这个数就是汉明距离。
向量相似度越高,对应的汉明距离越小。如10001001和10010001有2位不同。
#比较两张图片的相似度
from PIL import Image
from functools import reduce
import time
# 计算Hash
def phash(img):
img = img.resize((8, 8), Image.ANTIALIAS).convert('L')
avg = reduce(lambda x, y: x + y, img.getdata()) / 64.
return reduce(
lambda x, y: x | (y[1] << y[0]),
enumerate(map(lambda i: 0 if i < avg else 1, img.getdata())),
0
)
# 计算汉明距离
def hamming_distance(a, b):
return bin(a ^ b).count('1')
# 计算图片相似度
def is_imgs_similar(img1, img2):
return True if hamming_distance(phash(img1), phash(img2)) <= 5 else False
if __name__ == '__main__':
img1_path = 'F:\\project_py\\my_study\\Image\\003.jpg'
img2_path = "F:\\project_py\\my_study\\Image\\006.jpg"
img1 = Image.open(img1_path)
img2 = Image.open(img2_path)
start_time =time.time()
a = is_imgs_similar(img1, img2)
end_time = time.time()
print(a,end_time-start_time)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2019/1/15 9:19
# @Author : xiaodai
import os
import cv2
from skimage.measure import compare_ssim
import datetime
import shutil
def yidong(filename1,filename2):
shutil.move(filename1,filename2)
def delete(filename1):
os.remove(filename1)
def get_time(now):
now_now = datetime.datetime.now()
print('real_time:',now_now-now)
if __name__ == '__main__':
now = datetime.datetime.now()
or_path = r'G:\_test_xinjiang\0625'
# save_path_img = r'G:\video_jiance\0427test\pic_20190427\201904272030\JPEGImages'
# os.makedirs(save_path_img, exist_ok=True)
for di in or_path:
path = or_path + di
for (root, dirs, files) in os.walk(path):
for dirc in dirs:
# if dirc == 'rec_pic':
# if dirc == 'lou_img_dir':
if dirc == 'JPEGImages':
pic_path = os.path.join(root, dirc)
img_path = pic_path
imgs_n = []
num = []
img_files = [os.path.join(rootdir, file) for rootdir, _, files in os.walk(img_path) for file in files if
(file.endswith('.jpg'))]
for currIndex, filename in enumerate(img_files):
if not os.path.exists(img_files[currIndex]):
print('not exist', img_files[currIndex])
break
if currIndex >= len(img_files)-1:
break
else:
size = os.path.getsize(img_files[currIndex + 1])
if size < 512:
delete(img_files[currIndex + 1])
# yidong(img_files[currIndex + 1], save_path_img)
currIndex += 1
else:
img = cv2.imread(img_files[currIndex])
img = cv2.resize(img, (46, 46), interpolation=cv2.INTER_CUBIC)
img1 = cv2.imread(img_files[currIndex + 1])
img1 = cv2.resize(img1, (46, 46), interpolation=cv2.INTER_CUBIC)
ssim = compare_ssim(img, img1, multichannel=True)
if ssim > 0.9:
imgs_n.append(img_files[currIndex + 1])
print(img_files[currIndex], img_files[currIndex + 1], ssim)
else:
print('small_ssim',img_files[currIndex], img_files[currIndex + 1], ssim)
currIndex += 1
for image in imgs_n:
# yidong(image, save_path_img)
delete(image)
print('delete',image)
get_time(now)