imagededup使用起来非常方便,可以应对一些基本场景,但是由于所依赖的库包较多,所以我是使用anaconda创建虚拟环境防止影响生产或者测试环境
conda create -n img_dup python=3.6
conda activate img_dup
安装
pip install imagededup
简单示例
# conda环境 imagededup
from imagededup.methods import PHash
import os
import shutil
from scipy.fft import dst
def makepath(path):
if not os.path.exists(path):
os.makedirs(path)
phasher = PHash()
ori_path = './images'
dst_path = './images2'
makepath(dst_path)
# makepath(dst_path_duplicate)
# 生成图像目录中所有图像的二值hash编码
encodings = phasher.encode_images(image_dir=ori_path)
# 对已编码图像寻找重复图像
duplicates = phasher.find_duplicates(encoding_map=encodings, max_distance_threshold=3)#通过阈值控制去重率
# # 给定一幅图像,显示与其重复的图像
# from imagededup.utils import plot_duplicates
# plot_duplicates(image_dir='path/to/image/directory',
# duplicate_map=duplicates,
# filename='ukbench00120.jpg')
duplicate_list = []
for k,v in duplicates.items():
if k not in duplicate_list:
shutil.copy(os.path.join(ori_path,k),os.path.join(dst_path,k))
duplicate_list.extend(v)
print('finish!!')
开源地址:
https://github.com/idealo/imagededup
API教程:
https://idealo.github.io/imagededup/