有个需求,遍历文件夹内的文件,并搜索文件中是否存在特定关键字(中文)
import os
import re
from os import path
def cn_to_unicode(in_str, need_str=True, debug=False):
out = []
for s in in_str:
# 获得该字符的数值
val = ord(s)
# print(val)
# 小于0xff则为ASCII码,手动构造\u00xx格式
if val <= 0xff:
hex_str = hex(val).replace('0x', '').zfill(4)
# 这里不能以unicode_escape编码,不然会自动增加一个'\\'
res = bytes('\\u' + hex_str, encoding='utf-8')
else:
res = s.encode("unicode_escape")
out.append(res)
# 调试
if debug:
print(out)
print(len(out), len(out[0]), len(out[-1]))
# 转换为str类
if need_str:
out_str = ''
for s in out:
out_str += str(s, encoding='utf-8')
return out_str
else:
return out
def scaner_file (url,key):
file = os.listdir(url)
for f in file:
real_url = path.join (url , f)
if path.isfile(real_url):
file_path = path.abspath(real_url)
with open(file_path,encoding='utf8') as file_obj:
contents = file_obj.read()
res = re.findall(key, contents)
if(res):
print(contents)
exit('Success!')
elif path.isdir(real_url):
scaner_file(real_url)
else:
print("其他情况")
pass
print(real_url)
chinese_key = "你好"
unicode_key = cn_to_unicode(chinese_key)
check_dir = "./result"
scaner_file(check_dir,unicode_key)