1. 遍历所有文档
import psutil
import xlrd
import os
import itertools
import re
import time
import win32com.client
from typing import Generator, Any
from itertools import product
def fp_generator():
# 先获取电脑所有磁盘名称,导出利用os.walk()导出所有文件路径
for disk in [device[0] for device in psutil.disk_partitions()][::-1]: # 获得电脑所有磁盘名称
for root, dirs, files in os.walk(disk):
for file in files:
yield os.path.join(root, file)
2. 获取Word与Excel文件的内容
(结果多次试验,若频繁地Quit掉win32.client.Dispathch(“Word.Application”),再立刻创建并访问word文件,会报错,原因可能是windows系统还未真正关闭上一个对象,造成句柄冲突。因此一定要time.sleep,至少0.1秒。
但是Excel完全没有这个问题,这是让人头大的windows api)
def excel_values(path) -> Generator[str, Any, Any]:
# 获得所有EXCEL单元格(文本&数字)内容。若捕获错误,则为已加密文件打开时报错;或文件已损坏
try:
with xlrd.open_workbook(path) as wb:
for sheet in wb.sheets():
for x, y in itertools.product(range(sheet.ncols), range(sheet.nrows)):
cell = sheet.cell(y, x)
if cell.ctype == 1:
yield cell.value
elif cell.ctype == 2:
yield str(int(cell.value))
except:
pass
def word_values(path) -> Generator[str, Any, Any]:
# 获得所有WORD段落、表格的内容
try:
word_app = win32com.client.Dispatch("Word.Application")
word_app.Visible = 0
word_app.DisplayAlerts = 0
doc = word_app.Documents.Open(path, False, False, False, '8888aGa8332c9')
except:
return # 捕获异常说明密码不对或者无密码,return终止生成器
try:
for para in doc.Paragraphs: # Paragraphs方法不仅可以获取段落文本,也可以获取表格当中的内容
yield para.Range.text
doc.Close()
word_app.Quit()
except:
return
time.sleep(0.5) # 不要删除,不要删除。
map_get_values = {'.xls': excel_values,
'.doc': word_values}
def get_values(path) -> Generator[str, Any, Any]:
global map_get_values
for file_type, func in map_get_values.items():
if file_type in os.path.splitext(path)[1]:
print(f'正在查看:\t{path}')
return func(path)
else:
return (x for x in range(0)) # Generator```
3 判断是否含有客户身份私密信息(身份证号、手机号…)
def is_private(value: str) -> bool:
# 若值的内容包含手机号或身份证号号,返回TRUE
if not value: # 由于之前的函数没处理太好,有可能传入NoneType,所以暂时还是不要删除这一行
return
if re.search(r'1((3[\d])|(4[75])|(5[^3|4])|(66)|(7[013678])|(8[\d])|(9[89]))\d{8}', value) or re.search(
r'[1-9]\d{5}(18|19|20)\d{2}((0[1-9])|(1[0-2]))(([0-2][1-9])|[1-3]0|31)\d{3}[0-9xX]',
value):
return True
4. word&excel 加密
# 相关参数:https://docs.microsoft.com/zh-CN/office/vba/api/Excel.Workbooks.Open
def try_several_times(func):
def wrapper(*args, **kwargs):
for i in range(100):
try:
func(*args, **kwargs)
break
except:
print('something wrong')
return func
return wrapper
def excel_encryption(path, password):
# 将EXCEL文件打开,并加密保存
@try_several_times
def encryption(fp, pw):
excel_app = win32com.client.Dispatch('Excel.Application')
excel_app.Visible = 0
excel_app.DisplayAlerts = 0
wb = excel_app.Workbooks.Open(fp, False, False, None, Password='')
wb.SaveAs(fp, None, pw, '')
wb.Close()
excel_app.Quit()
encryption(path, password)
time.sleep(0.5)
def word_encryption(path, password):
# 若加密保存.docx时,覆盖原文件,则无法成功添加密码。但是保存为另一个文件名,则可以添加密码。
# 因此将A存为B,删A,再将B改为A。实在没办法,只能这样了 o(╥﹏╥)o
# win32com操作word简直一言难尽
dirname, tempname = os.path.split(path)
path_temp = os.path.join(dirname, tempname)
while os.path.exists(path_temp):
tempname = f'{len(tempname)}' + tempname
path_temp = os.path.join(dirname, tempname)
# 海象牙操作符: while os.path.exists(path_temp := os.path.join(dirname, tempname := f'{len(tempname)}' + tempname)): pass
@try_several_times
def encryption(fp, pt, pw):
word_app = win32com.client.Dispatch('Word.Application')
word_app.Visible = 0
word_app.DisplayAlerts = 0
doc = word_app.Documents.Open(fp, False, False, False, '1')
doc.SaveAs2(pt, None, False, pw)
doc.Close()
word_app.Quit()
encryption(path, path_temp, password)
os.remove(path) # 删除原文件
os.rename(path_temp, path) # 改临时文件名称为原文件名称
time.sleep(0.5) # 不要删除,不要删除
map_encrption = {'.xls': excel_encryption,
'.doc': word_encryption}
def encryption(fp, pw):
global map_encrption
for file_type, func in map_encrption.items():
if file_type in os.path.splitext(fp)[1]:
func(fp, pw)
print(f'正在加密:\t{fp}')```
5. 输入要设置的密码+运行逻辑
if __name__ == '__main__':
# 设置6位数字密码
while 1:
password = input("输入6位数字密码:")
password1 = input("确认6位数字密码:")
if not (len(password) == len(password) == 6 and password.isdigit() == password1.isdigit() == True):
print('\n格式错误,重新输入')
elif not password == password1:
print('\n两次密码不一致,重新输入')
else:
break
# 主体逻辑
for fp in fp_generator():
for value in get_values(fp):
if is_private(value):
encryption(fp, password)
break
**
**
大家可以创建几个包含真实手机号、身份证号号的word\excel,运行程序看看吧
都看到这儿了,麻烦点个点赞呀