**前言:**昨天接到这个任务,然后搜索了很多资料和博客去看,这些资料链接我会放在本文后面
我以网页邮箱(163邮箱为例)
#-*- encoding: utf-8 -*-
import email,sys
from imapclient import IMAPClient
from bs4 import BeautifulSoup
#服务器网址
hostname = 'imap.163.com'
#用户名即邮箱账号
username = '邮箱账号'
#授权码不是邮箱原密码
passwd = '你的授权码'
#链接服务器
server = IMAPClient(hostname, ssl= True)
#登陆
try:
#登陆账号
server.login(username, passwd)
# 上传客户端身份信息
server.id_({"name": "IMAPClient", "version": "2.1.0"})
#导航目录的列表,'INBOX','草稿箱'、'已发送'等
dictList = server.list_folders()
# print(dictList)
#对收件箱只读
info = server.select_folder('INBOX', readonly = True)
except server.Error:
print('Could not login')
sys.exit(1)
#获取邮件列表
result = server.search()
for uid in result:
massageList = server.fetch(uid,['BODY[]'])
mailBody = massageList[uid][b'BODY[]']
#邮件内容解析最里面那层是按字节来解析邮件主题内容,这个过程生成Message类型
try :
#我看其他博主的都是拿到str类型的,如果这个时候拿到邮件的类型是str就用这个
email_content = email.message_from_string(mailBody)
except TypeError:
#但压力在我这,我没拿到str类型的,拿到的是bytes字节类型的,那就用字节解析吧
email_content = email.message_from_bytes(mailBody)
#如果想知道字符集的可以在这先输出一下,查看结果里面的字符集是'utf-8'还是什么
# print(email_content)
#标题
subject = email.header.make_header(email.header.decode_header(email_content['SUBJECT']))
#发件人
mail_from = email.header.make_header(email.header.decode_header(email_content['From']))
#收件日期
envlope = (server.fetch(uid,['ENVELOPE']))[uid][b'ENVELOPE']
dates = envlope.date
# 获取内容的type编码方式
maintype = email_content.get_content_maintype()
if maintype == 'multipart':
for part in email_content.get_payload():
#获取邮件中的文本
if part.get_content_maintype() == 'text':
#取出正文内容并去掉前后的换行符、空格
mail_content = part.get_payload(decode=True).strip()
elif maintype == 'text':
mail_content = email_content.get_payload(decode=True).strip()
#用对应的字符集去解码
try:
#解码显示中文,如果utf-8不行用gbk或者其他
mail_content = mail_content.decode('gbk')
except UnicodeDecodeError:
try:
#拿到的内容是html格式的
mail_content = mail_content.decode('utf-8')
except UnicodeDecodeError:
print('decode error')
sys.exit(1)
#写进txt
#记得写上编码方式为'gb18030',不然写入txt会报错
with open(f'D:\IMAP\{uid}.txt','w+',encoding="gb18030") as f:
f.write(f'From:{mail_from}'+'\n')
f.write(f'Subject:{subject}'+'\n')
f.write(f'Date:{dates}'+'\n')
f.write(f'正文内容:'+'\n')
#用BeautifulSoup库的HTML解析器来解析邮件文本,并去掉多余的换行符
f.write((BeautifulSoup(mail_content,'html.parser').get_text().strip()).replace('\n\n', '')+'\n')
#-*- encoding: utf-8 -*-
import email,sys
from imapclient import IMAPClient
from bs4 import BeautifulSoup
#服务器网址
hostname = 'imap.163.com'
#用户名即邮箱账号
username = '邮箱账号'
#授权码不是邮箱原密码
passwd = '你的授权码'
#链接服务器
server = IMAPClient(hostname, ssl= True)
#登陆
try:
#登陆账号
server.login(username, passwd)
# 上传客户端身份信息
server.id_({"name": "IMAPClient", "version": "2.1.0"})
#导航目录的列表,'INBOX','草稿箱'、'已发送'等
dictList = server.list_folders()
# print(dictList)
#对收件箱只读
info = server.select_folder('INBOX', readonly = True)
except server.Error:
print('Could not login')
sys.exit(1)
#获取邮件列表
result = server.search()
for uid in result:
massageList = server.fetch(uid,['BODY[]'])
mailBody = massageList[uid][b'BODY[]']
#邮件内容解析最里面那层是按字节来解析邮件主题内容,这个过程生成Message类型
try :
email_content = email.message_from_string(mailBody)
except TypeError:
email_content = email.message_from_string(str(email.message_from_bytes(mailBody)))
# print(email_content)
#标题
subject = email.header.make_header(email.header.decode_header(email_content['SUBJECT']))
#发件人
mail_from = email.header.make_header(email.header.decode_header(email_content['From']))
#收件日期
envlope = (server.fetch(uid,['ENVELOPE']))[uid][b'ENVELOPE']
dates = envlope.date
# 获取内容的type编码方式
maintype = email_content.get_content_maintype()
if maintype == 'multipart':
for part in email_content.get_payload():
#获取邮件中的文本
if part.get_content_maintype() == 'text':
#下载
mail_content = part.get_payload(decode=True).strip()
elif maintype == 'text':
mail_content = email_content.get_payload(decode=True).strip()
try:
#解码显示中文,如果utf-8不行用gbk或者其他
mail_content = mail_content.decode('gbk')
except UnicodeDecodeError:
try:
mail_content = mail_content.decode('utf-8')
except UnicodeDecodeError:
print('decode error')
sys.exit(1)
#写进txt
with open(f'D:\IMAP\{uid}.txt','w+',encoding="gb18030") as f:
f.write(f'From:{mail_from}'+'\n')
f.write(f'Subject:{subject}'+'\n')
f.write(f'Date:{dates}'+'\n')
f.write(f'正文内容:'+'\n')
f.write((BeautifulSoup(mail_content,'html.parser').get_text().strip()).replace('\n\n', '')+'\n')
# print('From: ', mail_from)
# print('Subject: ', subject)
# print('Date:',dates)
# print('-'*10, 'mail content', '-'*10)
# print(mail_content.replace('
', '\n'))
# print('-'*10, 'mail content', '-'*10)
#退出登陆
server.logout()
1、Python实现IMAP协议接收和解析邮件内容
2、什么是IMAP协议
3、python_imap收取邮件 + 邮件内容解析
4、Python IMAP/POP3收取并解析邮件
article/details/114489568)
3、python_imap收取邮件 + 邮件内容解析
4、Python IMAP/POP3收取并解析邮件
5html文本写入txt