用python获取邮件内容比较简单,直接用现成的imap和pop3包即可,但是有时候邮件的内容不是plainText而是html甚至是一个url链接,原本的操作流程是点击url获取内容(比如csv等等)。
我这边的需求是从html里的众多url中找到包含所需文件的url并将文件的内容保存到本地。
上代码:
# -*- encoding: utf-8 -*-
import getpass, email, sys
import base64
import datetime
import re
from imapclient import IMAPClient
import requests
import time
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
today = time.strftime("%Y-%m-%d")
today_s = str(time.strftime("%d-%b-%Y"))
#配置imap客户端
server = IMAPClient(host='***.com', ssl=True)
def func():
try:
server.login('***.com', '***')
except server.Error:
print('Could not log in')
sys.exit(1)
#选取收件箱 只读模式
select_info = server.select_folder('INBOX', readonly=True)
#在收件箱里按照时间和发件人来搜索过滤邮件
messages = server.search(['TEXT', '***.com', 'SINCE', today_s])
#获取邮件内容、结果为两部分邮件id和邮件内容
msgdict = server.fetch(messages, ['BODY.PEEK[]'])
for u_id, message in msgdict.items():
url = get_url(message)
get_text(url)
def get_url(message):
e = email.message_from_string(message['BODY[]']) # 生成Message类型
html = ''
#解析转码获取html
for part in e.walk():
html = part.get_payload(decode=True)
#正则获取html的url链接
pat = re.compile(r"y:
y = len(x)
url = x
return url
def get_yes():
today = datetime.date.today()
oneday = datetime.timedelta(days=1)
yesterday = today-oneday
return yesterday
def get_text(url):
#获取url的内容
f = requests.get(url)
#将url指向的文件内容写到本地
with open("./temp", "wb") as code:
code.write(f.content)
with open("./temp", "r") as code:
lines = code.readlines()
n = 3
while n < len(lines):
res = ''
ls = lines[n].strip('\n').split('\t')
if len(ls) > 1:
res += ls[0] + '\t'
res += ls[3].replace(",", "") + '\t'
yesterday = get_yes()
res += str(yesterday)
with open('data/' + today, "a") as f_res:
f_res.write(res.replace('\0', ""))
f_res.write('\n')
n += 1
if __name__ == '__main__':
func()
代码很简单,不过需要提前在邮箱打开imap客户端协议。