python—-使用re正则表达式刷选数据,去重,列表,取特定行数据,TXT文件保存、汉字编码处理(适用于分行显示的数据提取)
环境配置:对目标服务器的日志文件进行刷选特定数据(192.168.4.28)
/usr/local/PMSCORPService/logs/stdout.log
python脚本必须在该服务器上运行
1、筛选银行卡相关数据
python代码:
[root@localhost tmp]# ls
findbankno.py findemail.py findidno.py findmemberid.py findmobileid.py finduserid.py
[root@localhost tmp]# cat findbankno.py
#!/usr/nbin/python
# --*-- coding:utf-8 --*--
import re
lastlist = []
logyzm = open("/usr/local/PMSCORPService/logs/stdout.log").read()
#print logyzm
temp = logyzm.decode("utf8")
findword = u"(.+ReceiveAccount.+)"#表示匹配有字段ReceiveAccount这一行所有数据
pattern = re.compile(findword)
results = pattern.findall(temp)
for result in results:
#print result
lastlist.append(result)
list = set(lastlist)
for l in list:
print l
[root@localhost tmp]#
运行情况:
[root@localhost tmp]# python findbankno.py
2018-06-14 17:16:22,005 [DubboServerHandler-192.168.4.28:12544-thread-499] [INFO]-[com.*****.*****.*****.*****.service.withdraw.WithdrawService confirmWithDrawDomesticBank 738]-TransferToBankService confirmWithDrawDomesticBank,TransferService.payMember MemberId ====request====*****@163.comTransferService ReceiveAccount:************6246
[root@localhost tmp]#
2、筛选email邮箱地址
python代码:
[root@localhost tmp]# cat findemail.py
#!/usr/nbin/python
# --*-- coding:utf-8 --*--
import re
lastlist = []
logyzm = open("/usr/local/PMSCORPService/logs/stdout.log").read()
#print logyzm
temp = logyzm.decode("utf8")
findword = u"(.+email:.+)"#表示匹配有email字符串这行所有数据
pattern = re.compile(findword)
results = pattern.findall(temp)
for result in results:
#print result
lastlist.append(result)
list = set(lastlist)
for l in list:
print l
[root@localhost tmp]#
运行情况:
[root@localhost tmp]# python findemail.py
2018-06-14 17:04:50,518 [DubboServerHandler-192.168.4.28:12544-thread-500] [INFO]-[com.*****.*****.*****.*****.service.common.EmailSendAndValidateService functionEmailCodeDelete 764]-EmailSendAndValidateService functionEmailCodeDelete,memberId:131999*****,email:q******q@sina.com,cachePrefix:pc.*****.*****.functionemailcode.login
2018-06-14 17:05:42,150 [DubboServerHandler-192.168.4.28:12544-thread-498] [INFO]-[com.*****.*****.*****.*****.service.common.EmailSendAndValidateService functionEmailCodeDelete 764]-EmailSendAndValidateService functionEmailCodeDelete,memberId:131999*****,email:q******q@sina.com,cachePrefix:pc.pms.corp.functionemailcode.login
3、筛选身份证号码
python代码:
[root@localhost tmp]# ls
findbankno.py findemail.py findidno.py findmemberid.py findmobileid.py finduserid.py
[root@localhost tmp]# cat findidno.py
#!/usr/nbin/python
# --*-- coding:utf-8 --*--
import re
lastlist = []
logyzm = open("/usr/local/PMSCORPService/logs/stdout.log").read()
#print logyzm
temp = logyzm.decode("utf8")
findword = u"(.+idNo.+)"# 表示匹配有idNo字符串这行所有数据
pattern = re.compile(findword)
results = pattern.findall(temp)
for result in results:
#print result
lastlist.append(result)
list = set(lastlist)
for l in list:
print l
[root@localhost tmp]#
脚本运行情况:
[root@localhost tmp]# python findidno.py
2018-06-14 17:09:04,441 [DubboServerHandler-192.168.4.28:12544-thread-499] [INFO]-[com.*****.*****.*****.*****.service.approve.ApproveService processAccountAuthByCSP 1095]-ApproveService processAccountAuthByCSP csp Approve requestParam cardNo: ***************2687,cardName:陈*荣,channelCode:ECT_ACCOUNT_AUTH,mobileNo:131*****066,idNo:450************553
2018-06-14 17:09:15,546 [DubboServerHandler-192.168.4.28:12544-thread-499] [INFO]-[com.*****.*****.*****.*****.service.approve.ApproveService processAccountAuthByCSP 1095]-ApproveService processAccountAuthByCSP csp Approve requestParam cardNo: ***************2687,cardName:陈*荣,channelCode:ECT_ACCOUNT_AUTH,mobileNo:131*****066,idNo:450************553
4、筛选Parameters、member id、payMemberId值
python代码:
[root@localhost tmp]# ls
findbankno.py findemail.py findidno.py findmemberid.py findmobileid.py finduserid.py
[root@localhost tmp]# cat findmemberid.py
#!/usr/nbin/python
# --*-- coding:utf-8 --*--
import re
lastlist = []
lastlist1 = []
lastlist2 = []
logyzm = open("/usr/local/PMSCORPService/logs/stdout.log").read()
#print logyzm
temp = logyzm.decode("utf8")
findword = u"(.+Parameters.+)"#表示取有该字符串的一行所有数据
pattern = re.compile(findword)
results = pattern.findall(temp)
for result in results:
#print result
lastlist.append(result)
list = set(lastlist)#对重复数据去重
for l in list:
print l
findword1 = u"(.+memberId.+)"#表示取有该字符串的一行所有数据
pattern1 = re.compile(findword1)
results1 = pattern1.findall(temp)
for result1 in results1:
#print result1
lastlist1.append(result1)
list1 = set(lastlist1)
for n in list1:
print n
findword2 = u"(.+payMemberId.+)"#表示取有该字符串的一行所有数据
pattern2 = re.compile(findword2)
results2 = pattern2.findall(temp)
for result2 in results2:
#print result2
lastlist2.append(result2)
last2 = set(lastlist2)
for m in last2:
print m
[root@localhost tmp]#
运行情况:
2018-06-14 16:45:36,907 [DubboServerHandler-192.168.4.28:12544-thread-500] [DEBUG]-[com.*****.*****.*****.jakarta.JakartaCommonsLoggingImpl debug 27]-{pstm-446016} Parameters: [131680*****]
2018-06-14 17:04:38,110 [DubboServerHandler-192.168.4.28:12544-thread-500] [INFO]-[com.*****.*****.*****.*****.service.common.PasswordValidateService loginPasswordErrorTimes 325]-PasswordValidate loginPasswordErrorTimes start memberId = q489*****@sina.com
2018-06-14 16:43:36,905 [DubboServerHandler-192.168.4.28:12544-thread-500] [INFO]-[com.*****.*****.*****.*****.service.common.PasswordValidateService loginPasswordErrorTimesRemove 366]-PasswordValidate loginPasswordErrorTimesRemove , memberId:q489*****@sina.com
5、筛选手机号码
python代码:
[root@localhost tmp]# ls
findbankno.py findemail.py findidno.py findmemberid.py findmobileid.py finduserid.py
[root@localhost tmp]# cat findmobileid.py
#!/usr/nbin/python
# --*-- coding:utf-8 --*--
import re
lastlist = []
logyzm = open("/usr/local/PMSCORPService/logs/stdout.log").read()
#print logyzm
temp = logyzm.decode("utf8")
findword = u"(.+mobileNo.+)"#表示取有该字符串的一行所有数据
pattern = re.compile(findword)
results = pattern.findall(temp)
for result in results:
#print result
lastlist.append(result)
list = set(lastlist)
for l in list:
print l
[root@localhost tmp]#
运行情况:
[root@localhost tmp]# python findmobileid.py
2018-06-14 16:49:03,640 [DubboServerHandler-192.168.4.28:12544-thread-500] [INFO]-[com.*****.*****.*****.*****.service.common.SmsSendAndValidateService commonSmsSend 1274]-SmsSendAndValidateService commonSmsSend,mobileNo:861*******896,msgTemplateId:1054,language:zh
2018-06-14 17:15:22,599 [DubboServerHandler-192.168.4.28:12544-thread-499] [INFO]-[com.*****.*****.*****.*****.service.common.SmsSendAndValidateService registerSmsCodeValidate 1093]-SmsSendAndValidateService registerSmsCodeValidate getregisterSmsCodeErrorTimes, mobileNo861*******022registerSmsCodeErrorTimes:0
2018-06-14 16:59:10,358 [DubboServerHandler-192.168.4.28:12544-thread-500] [INFO]-[com.*****.*****.*****.*****.service.common.SmsSendAndValidateService commonSmsSend 1274]-SmsSendAndValidateService commonSmsSend,mobileNo:861*******448,msgTemplateId:3026,language:zh
6、筛选user id值信息
python代码:
[root@localhost tmp]# ls
findbankno.py findemail.py findidno.py findmemberid.py findmobileid.py finduserid.py
[root@localhost tmp]# cat finduserid.py
#!/usr/nbin/python
# --*-- coding:utf-8 --*--
import re
lastlist = []
logyzm = open("/usr/local/PMSCORPService/logs/stdout.log").read()
#print logyzm
temp = logyzm.decode("utf8")
findword = u"(.+accountId.+)"#表示取有该字符串的一行所有数据
pattern = re.compile(findword)
results = pattern.findall(temp)
for result in results:
#print result
lastlist.append(result)
list = set(lastlist)
for l in list:
print l
[root@localhost tmp]#
代码运行:
[root@localhost tmp]# python finduserid.py
2018-06-14 17:16:21,912 [DubboServerHandler-192.168.4.28:12544-thread-499] [INFO]-[com.*****.*****.*****.*****.service.transfer.WithdrawTransferCommonService getDrawTransferConfig 390]-MerchantTransferInfoDao gerTransferConfig,accountId:55555*****,transferType:2
2018-06-14 17:16:04,542 [DubboServerHandler-192.168.4.28:12544-thread-499] [INFO]-[com.*****.*****.*****.*****.service.common.IPService isRegularIP 91]-IPService isRegularIP ,accountId:5555555566,memberId:*****@163.com,IP:192.168.40.36
2018-06-14 16:58:32,748 [DubboServerHandler-192.168.4.28:12544-thread-500] [INFO]-[com.com.*****.*****.*****.*****..member.impl.ScAccProotDao getECardLinesCount 37]-ScAccProotDao.getECardLinesCount:map:{startTime=2017-06-14, accountId=35700*****, endTime=2018-06-14 23:59:59}
7、文件保存
参考:https://blog.csdn.net/miaoqiucheng/article/details/73122111?locationNum=1&fps=1
[root@localhost tmp]# ls -la
total 308
drwxrwxrwt. 2 root root 4096 Jun 21 16:41 .
drwxr-xr-x. 17 root root 4096 Apr 16 2013 ..
-rw-r--r--. 1 root root 932 Jun 21 16:41 findbankno.py
-rw-r--r--. 1 root root 1337 Jun 21 12:23 findbankno.txt
-rw-r--r--. 1 root root 694 Jun 21 12:24 findemail.py
-rw-r--r--. 1 root root 4483 Jun 21 12:26 findemail.txt
-rw-r--r--. 1 root root 693 Jun 21 14:26 findidno.py
-rw-r--r--. 1 root root 19324 Jun 21 14:28 findidno.txt
-rw-r--r--. 1 root root 1062 Jun 21 14:32 findmemberid.py
-rw-r--r--. 1 root root 692 Jun 21 14:31 findmobileid.py
-rw-r--r--. 1 root root 74091 Jun 21 14:33 findmobile.txt
-rw-r--r--. 1 root root 693 Jun 21 14:31 finduserid.py
-rw-r--r--. 1 root root 168505 Jun 21 14:35 finduserid.txt
[root@localhost tmp]# cat findbankno.py
#!/usr/nbin/python
# --*-- coding:utf-8 --*--
import re
import codecs#导入汉字编码识别模块
lastlist = []
logyzm = open("/usr/local/PMSCORPService/logs/stdout.log").read()
#print logyzm
temp = logyzm.decode("utf8")
findword = u"(.+ReceiveAccount.+)"# .+表示匹配有字段ReceiveAccount这一行所有数据
pattern = re.compile(findword)
results = pattern.findall(temp)
for result in results:
#print result
lastlist.append(result)
list = set(lastlist)
filewrite = "/usr/tmp/findbankid.txt"#保存的目标文件
fileclear = open(filewrite,'w')#清空已有的内容
fileclear.write('\n')#插入一个换行符
fileclear.close()
for l in list:
#print l
file = codecs.open(filewrite,'a',encoding='utf-8')#使用codecs模块识别汉字,并追条追加方式写入文件中
file.write(l)#写入每条信息
file.write('\n')#每条信息后增加一个换行
file.close()
[root@localhost tmp]#
保存的文件
[root@localhost tmp]# cat findbankno.txt
2018-06-20 12:03:37,488 [DubboServerHandler-192.168.4.28:12544-thread-500] [INFO]-[com.*****.******.******.corp.service.withdraw.WithdrawService confirmWithDrawDomesticBank 738]-TransferToBankService confirmWithDrawDomesticBank,TransferService.payMember MemberId ====request====137****1580TransferService ReceiveAccount:************3509
2018-06-21 10:51:56,287 [DubboServerHandler-192.168.4.28:12544-thread-245] [INFO]-[com.*****.******.******.corp.service.withdraw.WithdrawService confirmWithDrawDomesticBank 738]-TransferToBankService confirmWithDrawDomesticBank,TransferService.payMember MemberId ====request====ga****@163.comTransferService ReceiveAccount:************6246
2018-06-21 10:50:37,794 [DubboServerHandler-192.168.4.28:12544-thread-204] [INFO]-[com.*****.******.******.corp.service.withdraw.WithdrawService confirmWithDrawDomesticBank 738]-TransferToBankService confirmWithDrawDomesticBank,TransferService.payMember MemberId ====request====ga****163.comTransferService ReceiveAccount:************6246
2018-06-20 12:04:22,176 [DubboServerHandler-192.168.4.28:12544-thread-498] [INFO]-[com.*****.******.******.corp.service.withdraw.WithdrawService confirmWithDrawDomesticBank 738]-TransferToBankService confirmWithDrawDomesticBank,TransferService.payMember MemberId ====request====137*****1580TransferService ReceiveAccount:************3509
[root@localhost tmp]#