python 读取邮件内容_python获取邮件内容(邮件内容为html)

用python获取邮件内容比较简单,直接用现成的imap和pop3包即可,但是有时候邮件的内容不是plainText而是html甚至是一个url链接,原本的操作流程是点击url获取内容(比如csv等等)。

我这边的需求是从html里的众多url中找到包含所需文件的url并将文件的内容保存到本地。

上代码:

# -*- encoding: utf-8 -*-

import getpass, email, sys

import base64

import datetime

import re

from imapclient import IMAPClient

import requests

import time

import sys

reload(sys)

sys.setdefaultencoding('utf-8')

today = time.strftime("%Y-%m-%d")

today_s = str(time.strftime("%d-%b-%Y"))

#配置imap客户端

server = IMAPClient(host='***.com', ssl=True)

def func():

try:

server.login('***.com', '***')

except server.Error:

print('Could not log in')

sys.exit(1)

#选取收件箱 只读模式

select_info = server.select_folder('INBOX', readonly=True)

#在收件箱里按照时间和发件人来搜索过滤邮件

messages = server.search(['TEXT', '***.com', 'SINCE', today_s])

#获取邮件内容、结果为两部分邮件id和邮件内容

msgdict = server.fetch(messages, ['BODY.PEEK[]'])

for u_id, message in msgdict.items():

url = get_url(message)

get_text(url)

def get_url(message):

e = email.message_from_string(message['BODY[]']) # 生成Message类型

html = ''

#解析转码获取html

for part in e.walk():

html = part.get_payload(decode=True)

#正则获取html的url链接

pat = re.compile(r"y:

y = len(x)

url = x

return url

def get_yes():

today = datetime.date.today()

oneday = datetime.timedelta(days=1)

yesterday = today-oneday

return yesterday

def get_text(url):

#获取url的内容

f = requests.get(url)

#将url指向的文件内容写到本地

with open("./temp", "wb") as code:

code.write(f.content)

with open("./temp", "r") as code:

lines = code.readlines()

n = 3

while n < len(lines):

res = ''

ls = lines[n].strip('\n').split('\t')

if len(ls) > 1:

res += ls[0] + '\t'

res += ls[3].replace(",", "") + '\t'

yesterday = get_yes()

res += str(yesterday)

with open('data/' + today, "a") as f_res:

f_res.write(res.replace('\0', ""))

f_res.write('\n')

n += 1

if __name__ == '__main__':

func()

代码很简单,不过需要提前在邮箱打开imap客户端协议。

你可能感兴趣的:(python,读取邮件内容)