re.sub共有五个参数。
其中三个必选参数:pattern, repl, string
两个可选参数:count, flags
第一个参数pattern:就是正则式字符串或者正则式对象
第二个参数repl:就是要用什么东西去替换pattern匹配到的字符串
第三个参数string:就是对哪个字符串进行匹配与替换
第四个参数count:就是只替换前几个,如果是0则全部都替换
import re
class Desensitization(object):
def __init__(self, email=None, mobile=None, identity_card=None, address=None):
self.email = email
self.mobile = mobile
self.identity_card = identity_card
self.address = address
def get_email_complex(self):
# 通过@分开字符串
mail_split_list = self.email.split('@')
# 前面2位显示
dis_mail_pre = mail_split_list[0][:2]
# 其它位数隐藏
hide_mail_pre = mail_split_list[0][2:]
# 最后2位显示
dis_mail_last = mail_split_list[-1][-2:]
# 其它位数隐藏
hide_mail_last = mail_split_list[-1][:-2]
# 使用re.sub过滤
result_hide_mail_pre = re.sub(hide_mail_pre, len(hide_mail_pre) * '*', hide_mail_pre)
# 使用re.sub过滤
result_hide_mail_last = re.sub(hide_mail_last, len(hide_mail_last) * '*', hide_mail_last)
# 将结果集合起来
desensitization_mail = dis_mail_pre + result_hide_mail_pre + '@' + result_hide_mail_last + dis_mail_last
return desensitization_mail
def get_email(self):
hide_mail_content = self.email[2:-2]
result = re.sub(hide_mail_content, len(hide_mail_content) * '*', self.email)
return result
def get_mobile(self):
hide_mail_content = self.mobile[3:-4]
result = re.sub(hide_mail_content, len(hide_mail_content) * '*', self.mobile)
return result
def get_identity_card(self):
hide_identity_card = self.identity_card[3:-4]
result = re.sub(hide_identity_card, len(hide_identity_card) * '*', self.identity_card)
return result
def get_address(self):
hide_address_content = self.address[3:-3]
result = re.sub(hide_address_content, len(hide_address_content) * '*', self.address)
return result
if __name__ == "__main__":
result_email = Desensitization(email='[email protected]')
print(result_email.get_email())
result_mobile = Desensitization(mobile='18911112222')
print(result_mobile.get_mobile())
result_id = Desensitization(identity_card='123123200001011234')
print(result_id.get_identity_card())
result_address = Desensitization(address="上海市虹口区某某路某某号123室")
print(result_address.get_address())
做文字多选过滤处理。
# 在正则表达式语法中,竖线“|”表示二选一或多选一。
def check_filter(keywords, text):
return re.sub("|".join(keywords), "***", text)
keywords = ("暴力", "色情", "其他关键字")
text = "这句话里不包含暴力,也不包含色情,但是可能包含其他关键字"
print(check_filter(keywords, text))