python删除文件夹中重复的图片

如题,但我觉得这个东西并不怎么好,不知道你们有什么好的办法,如下是两种方法

import cv2
import numpy as np

# 均值哈希算法
def aHash(img):
    # 缩放为8*8
    img = cv2.resize(img, (8, 8))
    # 转换为灰度图
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # s为像素和初值为0,hash_str为hash值初值为''
    s = 0
    hash_str = ''
    # 遍历累加求像素和
    for i in range(8):
        for j in range(8):
            s = s + gray[i, j]
    # 求平均灰度
    avg = s / 64
    # 灰度大于平均值为1相反为0生成图片的hash值
    for i in range(8):
        for j in range(8):
            if gray[i, j] > avg:
                hash_str = hash_str + '1'
            else:
                hash_str = hash_str + '0'
    return hash_str
# 差值感知算法
def dHash(img):
    img = cv2.resize(img, (9, 8))
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    hash_str = ''
    # 每行前一个像素大于后一个像素为1,相反为0,生成哈希
    for i in range(8):
        for j in range(8):
            if gray[i, j] > gray[i, j + 1]:
                hash_str = hash_str + '1'
            else:
                hash_str = hash_str + '0'
    return hash_str
# 感知哈希算法(pHash)
def pHash(img):
    img = cv2.resize(img, (32, 32))  # , interpolation=cv2.INTER_CUBIC
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # 将灰度图转为浮点型,再进行dct变换
    dct = cv2.dct(np.float32(gray))
    # opencv实现的掩码操作
    dct_roi = dct[0:8, 0:8]

    hash = []
    avreage = np.mean(dct_roi)
    for i in range(dct_roi.shape[0]):
        for j in range(dct_roi.shape[1]):
            if dct_roi[i, j] > avreage:
                hash.append(1)
            else:
                hash.append(0)
    return hash
# Hash值对比
def cmpHash(hash1, hash2):
    n = 0
    # hash长度不同则返回-1代表传参出错
    if len(hash1)!=len(hash2):
        return -1
    # 遍历判断
    for i in range(len(hash1)):
        # 不相等则n计数+1,n最终为相似度
        if hash1[i] != hash2[i]:
            n = n + 1
    return n
import os
if __name__ == '__main__':
    directory_name = 'part'
    for name in os.listdir(r""+directory_name):
        name = directory_name+''+name
        for name2 in os.listdir(r""+directory_name):
            name2 = directory_name+''+name2
            img1 = cv2.imread(name)
            img2 = cv2.imread(name2)
            hash1 = aHash(img1)
            hash2 = aHash(img2)
            n = cmpHash(hash1, hash2)
            # print('均值哈希算法相似度:', n)

            hash1 = dHash(img1)
            hash2 = dHash(img2)
            n = cmpHash(hash1, hash2)
            # print('差值哈希算法相似度:', n)

            hash1 = pHash(img1)
            hash2 = pHash(img2)
            n = cmpHash(hash1, hash2)
            if(n<=5):
                if(name != name2):
                    print(name)
                    print(name2)
                    # os.remove(name2)

            # print('感知哈希算法相似度:', n)

 

另一种是ssim

import os
import cv2
import shutil
from skimage.metrics import structural_similarity
def delete(filename1):
    os.remove(filename1)
def yidong(filename1,filename2):
    shutil.move(filename1,filename2)
if __name__ == '__main__':
    path = r'202102/'

    img_path = path
    imgs_n = []
    num = []
    img_files = [os.path.join(rootdir, file) for rootdir, _, files in os.walk(path) for file in files if
                 (file.endswith('.jpg'))]
    for currIndex, filename in enumerate(img_files):
        if not os.path.exists(img_files[currIndex]):
            print('not exist', img_files[currIndex])
            break
        img = cv2.imread(img_files[currIndex])
        img1 = cv2.imread(img_files[currIndex + 1])
        img = cv2.resize(img,(46,46),interpolation=cv2.INTER_CUBIC)
        img1 = cv2.resize(img1,(46,46),interpolation=cv2.INTER_CUBIC)
        ssim = structural_similarity(img, img1, multichannel=True)
        if ssim > 0.3:
            imgs_n.append(img_files[currIndex + 1])
            print(img_files[currIndex], img_files[currIndex + 1], ssim)
        currIndex += 1
        if currIndex >= len(img_files)-1:
            break

你可能感兴趣的:(python,计算机视觉,python)