python遍历文件夹内文件并检索文件中的中文内容

前言

有个需求,遍历文件夹内的文件,并搜索文件中是否存在特定关键字(中文)

代码

import os 
import re
from os import path 

def cn_to_unicode(in_str, need_str=True, debug=False):
    out = []

    for s in in_str:
        # 获得该字符的数值
        val = ord(s)
        # print(val)

        # 小于0xff则为ASCII码,手动构造\u00xx格式
        if val <= 0xff:
            hex_str = hex(val).replace('0x', '').zfill(4)
            # 这里不能以unicode_escape编码,不然会自动增加一个'\\'
            res = bytes('\\u' + hex_str, encoding='utf-8')
        else:
            res = s.encode("unicode_escape")

        out.append(res)
    
    # 调试
    if debug:
        print(out)
        print(len(out), len(out[0]), len(out[-1]))

    # 转换为str类
    if need_str:
        out_str = ''
        for s in out:
            out_str += str(s, encoding='utf-8')
        return out_str
    else:
        return out

def scaner_file (url,key):
  file  = os.listdir(url)
  for f in file:
    real_url = path.join (url , f)
    if path.isfile(real_url):
      file_path = path.abspath(real_url)
      with open(file_path,encoding='utf8') as file_obj:
        contents = file_obj.read()
        res = re.findall(key, contents)
        if(res):
          print(contents)
          exit('Success!')
    elif path.isdir(real_url):
      scaner_file(real_url)
    else:
      print("其他情况")
      pass
  print(real_url)

chinese_key = "你好"
unicode_key = cn_to_unicode(chinese_key)
check_dir = "./result"

scaner_file(check_dir,unicode_key)  

你可能感兴趣的:(【每日学习】,python)