python基础之寻找相同的文件

整体思路:
首先是循环遍历文件夹下的文件;
通过建立字典:key:文件内容的哈希值,value:文件名
遇到一个文件,得到哈希值,并比较是否在字典中存在,如果存在就输出文件名,,否则加入字典

# -*- coding: utf-8 -*-
import os
import fnmatch
import hashlib
def is_file_match(filename, patterns):
    for pattern in patterns:
        if fnmatch.fnmatch(filename, pattern):
            return True
    return False
def find_specific_files(root, patterns=['*.py'], exclude_dirs=[]):
    for root, dirnames, filenames in os.walk(root):
        for filename in filenames:
            if is_file_match(filename, patterns):
                yield os.path.join(root, filename)

        for d in exclude_dirs:
            if d in dirnames:
                dirnames.remove(d)
d=input('输入文件地址:')
a={}
def get_chunk(filename):
    with open(filename,encoding='UTF-8') as f:
        while True:
            chunk = f.readline()
            if not chunk:
                break
            else:
                yield chunk
def get_file_checksum(filename):
    h=hashlib.md5()
    for chunk in get_chunk(filename):
        h.update(chunk.encode())
    return h.hexdigest()

for item in find_specific_files(d):
    key=get_file_checksum(item)
    if key in a:
        print(item,'is the same with',a[key])
    else:
        a[key]=item
        

你可能感兴趣的:(python)