python----使用re正则表达式刷选数据,去重,列表,取特定行数据,TXT文件保存、汉字编码处理(适用于分行显示的数据提取)

python—-使用re正则表达式刷选数据,去重,列表,取特定行数据,TXT文件保存、汉字编码处理(适用于分行显示的数据提取)

环境配置:对目标服务器的日志文件进行刷选特定数据(192.168.4.28)
/usr/local/PMSCORPService/logs/stdout.log

python脚本必须在该服务器上运行

1、筛选银行卡相关数据

python代码:

[root@localhost tmp]# ls
findbankno.py  findemail.py  findidno.py  findmemberid.py  findmobileid.py  finduserid.py
[root@localhost tmp]# cat findbankno.py 
#!/usr/nbin/python  
# --*-- coding:utf-8 --*--  
import re  

lastlist = []
logyzm = open("/usr/local/PMSCORPService/logs/stdout.log").read()  
#print logyzm  
temp = logyzm.decode("utf8")  
findword = u"(.+ReceiveAccount.+)"#表示匹配有字段ReceiveAccount这一行所有数据
pattern = re.compile(findword)  
results = pattern.findall(temp)  
for result in results:  
    #print result
    lastlist.append(result)

list = set(lastlist)
for l in list:
    print l
[root@localhost tmp]# 
运行情况:
[root@localhost tmp]# python findbankno.py 
2018-06-14 17:16:22,005 [DubboServerHandler-192.168.4.28:12544-thread-499] [INFO]-[com.*****.*****.*****.*****.service.withdraw.WithdrawService confirmWithDrawDomesticBank 738]-TransferToBankService confirmWithDrawDomesticBank,TransferService.payMember MemberId ====request====*****@163.comTransferService ReceiveAccount:************6246
[root@localhost tmp]# 

2、筛选email邮箱地址

python代码:
[root@localhost tmp]# cat findemail.py 
#!/usr/nbin/python  
# --*-- coding:utf-8 --*--  
import re  

lastlist = []
logyzm = open("/usr/local/PMSCORPService/logs/stdout.log").read()  
#print logyzm  
temp = logyzm.decode("utf8")  
findword = u"(.+email:.+)"#表示匹配有email字符串这行所有数据
pattern = re.compile(findword)  
results = pattern.findall(temp)  
for result in results:  
    #print result
    lastlist.append(result)

list = set(lastlist)
for l in list:
    print l
[root@localhost tmp]# 
运行情况:
[root@localhost tmp]# python findemail.py 
2018-06-14 17:04:50,518 [DubboServerHandler-192.168.4.28:12544-thread-500] [INFO]-[com.*****.*****.*****.*****.service.common.EmailSendAndValidateService functionEmailCodeDelete 764]-EmailSendAndValidateService functionEmailCodeDelete,memberId:131999*****,email:q******q@sina.com,cachePrefix:pc.*****.*****.functionemailcode.login
2018-06-14 17:05:42,150 [DubboServerHandler-192.168.4.28:12544-thread-498] [INFO]-[com.*****.*****.*****.*****.service.common.EmailSendAndValidateService functionEmailCodeDelete 764]-EmailSendAndValidateService functionEmailCodeDelete,memberId:131999*****,email:q******q@sina.com,cachePrefix:pc.pms.corp.functionemailcode.login

3、筛选身份证号码

python代码:
[root@localhost tmp]# ls
findbankno.py  findemail.py  findidno.py  findmemberid.py  findmobileid.py  finduserid.py
[root@localhost tmp]# cat findidno.py 
#!/usr/nbin/python  
# --*-- coding:utf-8 --*--  
import re  

lastlist = []
logyzm = open("/usr/local/PMSCORPService/logs/stdout.log").read()  
#print logyzm  
temp = logyzm.decode("utf8")  
findword = u"(.+idNo.+)"# 表示匹配有idNo字符串这行所有数据  
pattern = re.compile(findword)  
results = pattern.findall(temp)  
for result in results:  
    #print result
    lastlist.append(result)

list = set(lastlist)
for l in list:
    print l

[root@localhost tmp]# 
脚本运行情况:
[root@localhost tmp]# python findidno.py 
2018-06-14 17:09:04,441 [DubboServerHandler-192.168.4.28:12544-thread-499] [INFO]-[com.*****.*****.*****.*****.service.approve.ApproveService processAccountAuthByCSP 1095]-ApproveService processAccountAuthByCSP  csp  Approve requestParam cardNo: ***************2687,cardName:陈*荣,channelCode:ECT_ACCOUNT_AUTH,mobileNo:131*****066,idNo:450************553
2018-06-14 17:09:15,546 [DubboServerHandler-192.168.4.28:12544-thread-499] [INFO]-[com.*****.*****.*****.*****.service.approve.ApproveService processAccountAuthByCSP 1095]-ApproveService processAccountAuthByCSP  csp  Approve requestParam cardNo: ***************2687,cardName:陈*荣,channelCode:ECT_ACCOUNT_AUTH,mobileNo:131*****066,idNo:450************553

4、筛选Parameters、member id、payMemberId值

python代码:
[root@localhost tmp]# ls
findbankno.py  findemail.py  findidno.py  findmemberid.py  findmobileid.py  finduserid.py
[root@localhost tmp]# cat findmemberid.py 
#!/usr/nbin/python  
# --*-- coding:utf-8 --*--  
import re  

lastlist = []
lastlist1 = []
lastlist2 = []

logyzm = open("/usr/local/PMSCORPService/logs/stdout.log").read()  
#print logyzm  
temp = logyzm.decode("utf8")  
findword = u"(.+Parameters.+)"#表示取有该字符串的一行所有数据
pattern = re.compile(findword)  
results = pattern.findall(temp)  
for result in results:  
    #print result
    lastlist.append(result)
list = set(lastlist)#对重复数据去重
for l in list:
    print l

findword1 = u"(.+memberId.+)"#表示取有该字符串的一行所有数据
pattern1 = re.compile(findword1)  
results1 = pattern1.findall(temp)  
for result1 in results1:  
    #print result1
    lastlist1.append(result1)
list1 = set(lastlist1)
for n in list1:
    print n

findword2 = u"(.+payMemberId.+)"#表示取有该字符串的一行所有数据
pattern2 = re.compile(findword2)  
results2 = pattern2.findall(temp)  
for result2 in results2:  
    #print result2
    lastlist2.append(result2)
last2 = set(lastlist2)
for m in last2:
    print m

[root@localhost tmp]# 
运行情况:
2018-06-14 16:45:36,907 [DubboServerHandler-192.168.4.28:12544-thread-500] [DEBUG]-[com.*****.*****.*****.jakarta.JakartaCommonsLoggingImpl debug 27]-{pstm-446016} Parameters: [131680*****]
2018-06-14 17:04:38,110 [DubboServerHandler-192.168.4.28:12544-thread-500] [INFO]-[com.*****.*****.*****.*****.service.common.PasswordValidateService loginPasswordErrorTimes 325]-PasswordValidate loginPasswordErrorTimes  start memberId = q489*****@sina.com
2018-06-14 16:43:36,905 [DubboServerHandler-192.168.4.28:12544-thread-500] [INFO]-[com.*****.*****.*****.*****.service.common.PasswordValidateService loginPasswordErrorTimesRemove 366]-PasswordValidate loginPasswordErrorTimesRemove , memberId:q489*****@sina.com

5、筛选手机号码

python代码:
[root@localhost tmp]# ls
findbankno.py  findemail.py  findidno.py  findmemberid.py  findmobileid.py  finduserid.py
[root@localhost tmp]# cat findmobileid.py 
#!/usr/nbin/python  
# --*-- coding:utf-8 --*--  
import re  

lastlist = []
logyzm = open("/usr/local/PMSCORPService/logs/stdout.log").read()  
#print logyzm  
temp = logyzm.decode("utf8")  
findword = u"(.+mobileNo.+)"#表示取有该字符串的一行所有数据
pattern = re.compile(findword)  
results = pattern.findall(temp)  
for result in results:  
    #print result
    lastlist.append(result)

list = set(lastlist)
for l in list:
    print l
[root@localhost tmp]# 
运行情况:
[root@localhost tmp]# python findmobileid.py 
2018-06-14 16:49:03,640 [DubboServerHandler-192.168.4.28:12544-thread-500] [INFO]-[com.*****.*****.*****.*****.service.common.SmsSendAndValidateService commonSmsSend 1274]-SmsSendAndValidateService commonSmsSend,mobileNo:861*******896,msgTemplateId:1054,language:zh
2018-06-14 17:15:22,599 [DubboServerHandler-192.168.4.28:12544-thread-499] [INFO]-[com.*****.*****.*****.*****.service.common.SmsSendAndValidateService registerSmsCodeValidate 1093]-SmsSendAndValidateService registerSmsCodeValidate getregisterSmsCodeErrorTimes, mobileNo861*******022registerSmsCodeErrorTimes:0
2018-06-14 16:59:10,358 [DubboServerHandler-192.168.4.28:12544-thread-500] [INFO]-[com.*****.*****.*****.*****.service.common.SmsSendAndValidateService commonSmsSend 1274]-SmsSendAndValidateService commonSmsSend,mobileNo:861*******448,msgTemplateId:3026,language:zh

6、筛选user id值信息

python代码:
[root@localhost tmp]# ls
findbankno.py  findemail.py  findidno.py  findmemberid.py  findmobileid.py  finduserid.py
[root@localhost tmp]# cat finduserid.py 
#!/usr/nbin/python  
# --*-- coding:utf-8 --*--  
import re  

lastlist = []
logyzm = open("/usr/local/PMSCORPService/logs/stdout.log").read()  
#print logyzm  
temp = logyzm.decode("utf8")  
findword = u"(.+accountId.+)"#表示取有该字符串的一行所有数据
pattern = re.compile(findword)  
results = pattern.findall(temp)  
for result in results:  
    #print result
    lastlist.append(result)

list = set(lastlist)
for l in list:
    print l
[root@localhost tmp]# 
代码运行:
[root@localhost tmp]# python finduserid.py 
2018-06-14 17:16:21,912 [DubboServerHandler-192.168.4.28:12544-thread-499] [INFO]-[com.*****.*****.*****.*****.service.transfer.WithdrawTransferCommonService getDrawTransferConfig 390]-MerchantTransferInfoDao gerTransferConfig,accountId:55555*****,transferType:2
2018-06-14 17:16:04,542 [DubboServerHandler-192.168.4.28:12544-thread-499] [INFO]-[com.*****.*****.*****.*****.service.common.IPService isRegularIP 91]-IPService isRegularIP ,accountId:5555555566,memberId:*****@163.com,IP:192.168.40.36
2018-06-14 16:58:32,748 [DubboServerHandler-192.168.4.28:12544-thread-500] [INFO]-[com.com.*****.*****.*****.*****..member.impl.ScAccProotDao getECardLinesCount 37]-ScAccProotDao.getECardLinesCount:map:{startTime=2017-06-14, accountId=35700*****, endTime=2018-06-14 23:59:59}

7、文件保存
参考:https://blog.csdn.net/miaoqiucheng/article/details/73122111?locationNum=1&fps=1

[root@localhost tmp]# ls -la
total 308
drwxrwxrwt.  2 root root   4096 Jun 21 16:41 .
drwxr-xr-x. 17 root root   4096 Apr 16  2013 ..
-rw-r--r--.  1 root root    932 Jun 21 16:41 findbankno.py
-rw-r--r--.  1 root root   1337 Jun 21 12:23 findbankno.txt
-rw-r--r--.  1 root root    694 Jun 21 12:24 findemail.py
-rw-r--r--.  1 root root   4483 Jun 21 12:26 findemail.txt
-rw-r--r--.  1 root root    693 Jun 21 14:26 findidno.py
-rw-r--r--.  1 root root  19324 Jun 21 14:28 findidno.txt
-rw-r--r--.  1 root root   1062 Jun 21 14:32 findmemberid.py
-rw-r--r--.  1 root root    692 Jun 21 14:31 findmobileid.py
-rw-r--r--.  1 root root  74091 Jun 21 14:33 findmobile.txt
-rw-r--r--.  1 root root    693 Jun 21 14:31 finduserid.py
-rw-r--r--.  1 root root 168505 Jun 21 14:35 finduserid.txt
[root@localhost tmp]# cat findbankno.py
#!/usr/nbin/python  
# --*-- coding:utf-8 --*--  
import re  
import codecs#导入汉字编码识别模块

lastlist = []
logyzm = open("/usr/local/PMSCORPService/logs/stdout.log").read()  
#print logyzm  
temp = logyzm.decode("utf8")  
findword = u"(.+ReceiveAccount.+)"# .+表示匹配有字段ReceiveAccount这一行所有数据
pattern = re.compile(findword)  
results = pattern.findall(temp)  
for result in results:  
    #print result
    lastlist.append(result)

list = set(lastlist)

filewrite = "/usr/tmp/findbankid.txt"#保存的目标文件
fileclear = open(filewrite,'w')#清空已有的内容
fileclear.write('\n')#插入一个换行符
fileclear.close()

for l in list:
    #print l
    file = codecs.open(filewrite,'a',encoding='utf-8')#使用codecs模块识别汉字,并追条追加方式写入文件中
    file.write(l)#写入每条信息
    file.write('\n')#每条信息后增加一个换行
    file.close()
[root@localhost tmp]# 
保存的文件
[root@localhost tmp]# cat findbankno.txt 

2018-06-20 12:03:37,488 [DubboServerHandler-192.168.4.28:12544-thread-500] [INFO]-[com.*****.******.******.corp.service.withdraw.WithdrawService confirmWithDrawDomesticBank 738]-TransferToBankService confirmWithDrawDomesticBank,TransferService.payMember MemberId ====request====137****1580TransferService ReceiveAccount:************3509
2018-06-21 10:51:56,287 [DubboServerHandler-192.168.4.28:12544-thread-245] [INFO]-[com.*****.******.******.corp.service.withdraw.WithdrawService confirmWithDrawDomesticBank 738]-TransferToBankService confirmWithDrawDomesticBank,TransferService.payMember MemberId ====request====ga****@163.comTransferService ReceiveAccount:************6246
2018-06-21 10:50:37,794 [DubboServerHandler-192.168.4.28:12544-thread-204] [INFO]-[com.*****.******.******.corp.service.withdraw.WithdrawService confirmWithDrawDomesticBank 738]-TransferToBankService confirmWithDrawDomesticBank,TransferService.payMember MemberId ====request====ga****163.comTransferService ReceiveAccount:************6246
2018-06-20 12:04:22,176 [DubboServerHandler-192.168.4.28:12544-thread-498] [INFO]-[com.*****.******.******.corp.service.withdraw.WithdrawService confirmWithDrawDomesticBank 738]-TransferToBankService confirmWithDrawDomesticBank,TransferService.payMember MemberId ====request====137*****1580TransferService ReceiveAccount:************3509
[root@localhost tmp]# 

你可能感兴趣的:(python)