递归查找指定目录中含有指定关键字的文件

Windows不支持搜索.java、.c、.py、.tcl文件的内容。这里我实现一个函数find_files_by_suffix_and_content()递归查找指定目录中含有指定关键字的文件。可以根据文件后缀过滤文件。

#python3.6
#Windows不支持搜索.java、.c、.py、.tcl文件的内容。这里我实现一个函数find_files_by_suffix_and_content()递归查找指定目录中含有指定关键字的文件。可以根据文件后缀过滤文件。
import os

#查找指定目录中含有指定关键字的文件。可以根据文件后缀过滤文件,可以指定文件的编码格式。
def find_files_by_suffix_and_content(directory:str,key_words:str,suffix:str=".py",encoding:str='utf-8'):
    files=find_files_by_suffix(directory,suffix)
    return find_files_by_content(files,key_words,encoding=encoding)

def find_files_by_suffix(directory:str,suffix:str=".py"):
    'Get full pathes of all files with the suffix in this directory.'
    #os.walk() returns all directories, subdirectories and files recursively.
    file_number=0
    files=[]
    for dir_file in os.walk(directory):
        dirpath,dirnames,filenames=dir_file
        for filename in filenames:
            if(filename.endswith(suffix)):
                file_number+=1
                files.append(os.path.join(dirpath,filename))
    return files

def find_files_by_content(files:list,key_words:str,encoding:str='utf-8'):
    'Search key_words in the content of these files'
    match_file_number=0
    match_times=0
    match_files=[]
    for file in files:
        content=""
        with open(file,'r',encoding=encoding) as f:
            try:
                content=f.read()
            except Exception as e:
                pass
            time=content.count(key_words)
            if time>0:
                match_files.append(file)
                match_file_number+=1
                match_times+=time
    return match_files, match_times            

if __name__ == "__main__":
    directory=r"D:\Python36\Lib\site-packages\oscar" 
    match_files,match_times=find_files_by_suffix_and_content(directory,"ModelForm",suffix=".py",encoding='utf-8')
    print("match file number:",len(match_files))
    print("key_words occurs:",match_times)  
    for file in match_files:
        print(file)

输出为:

match file number: 20
key_words occurs: 46
D:\Python36\Lib\site-packages\oscar\apps\address\forms.py
D:\Python36\Lib\site-packages\oscar\apps\basket\forms.py
D:\Python36\Lib\site-packages\oscar\apps\basket\formsets.py
D:\Python36\Lib\site-packages\oscar\apps\basket\views.py
D:\Python36\Lib\site-packages\oscar\apps\catalogue\reviews\forms.py
D:\Python36\Lib\site-packages\oscar\apps\customer\forms.py
D:\Python36\Lib\site-packages\oscar\apps\dashboard\catalogue\forms.py
D:\Python36\Lib\site-packages\oscar\apps\dashboard\communications\forms.py
D:\Python36\Lib\site-packages\oscar\apps\dashboard\offers\forms.py
D:\Python36\Lib\site-packages\oscar\apps\dashboard\orders\forms.py
D:\Python36\Lib\site-packages\oscar\apps\dashboard\pages\forms.py
D:\Python36\Lib\site-packages\oscar\apps\dashboard\partners\forms.py
D:\Python36\Lib\site-packages\oscar\apps\dashboard\ranges\forms.py
D:\Python36\Lib\site-packages\oscar\apps\dashboard\reviews\forms.py
D:\Python36\Lib\site-packages\oscar\apps\dashboard\shipping\forms.py
D:\Python36\Lib\site-packages\oscar\apps\dashboard\users\forms.py
D:\Python36\Lib\site-packages\oscar\apps\dashboard\vouchers\forms.py
D:\Python36\Lib\site-packages\oscar\apps\payment\forms.py
D:\Python36\Lib\site-packages\oscar\apps\wishlists\forms.py
D:\Python36\Lib\site-packages\oscar\forms\mixins.py

 

你可能感兴趣的:(Python,python)