python3-读取邮件内容

 背景:

        供货商会给公司发服务器,需要把发过来的单子自动录入到系统里面去,所以第一步需要把邮件正文读取出来,然后再自动录入到系统里去~~

详细代码如下:

# !/usr/bin/env python
# -*-coding:utf-8 -*-

"""
# File       : getemail.py
# Time       :2022/8/11 15:43
# version    :python 3.6
# Description:
"""
import calendar
import poplib

# 输入邮件地址, 口令和POP3服务器地址:
import re
from email.parser import Parser

from email.header import decode_header
from email.utils import parseaddr


class Email:
    def __init__(self, email, password, pop3_server='pop3.mxhichina.com'):
        # 连接到POP3服务器:
        self.server = poplib.POP3(pop3_server)
        # 可以打开或关闭调试信息:
        # self.server.set_debuglevel(1)
        # 可选:打印POP3服务器的欢迎文字:
        # print(server.getwelcome().decode('utf-8'))

        # 身份认证:
        self.server.user(email)
        self.server.pass_(password)

        # stat()返回邮件数量和占用空间:
        # print('Messages: %s. Size: %s' % server.stat())
        # list()返回所有邮件的编号:
        resp, mails, octets = self.server.list()
        # 可以查看返回的列表类似[b'1 82923', b'2 2184', ...]
        # print(mails)

        # 获取最新一封邮件, 注意索引号从1开始:
        index = len(mails)
        resp, lines, octets = self.server.retr(index)
        # lines存储了邮件的原始文本的每一行,
        # 可以获得整个邮件的原始文本:
        self.msg_content = b'\r\n'.join(lines).decode('utf-8')

        # print(get_date)
        # 稍后解析出邮件:
        self.msg = Parser().parsestr(self.msg_content)
        # 关闭连接:
        self.server.quit()
        self.body = ''
        self.header_dict = {}

    def get_email_time(self):
        get_date = re.search(r'Date:\s([A-Za-z]{1,3}),\s([0-9]{1,2})\s([A-Za-z]{1,3})\s([0-9]{1,4})\s([0-9]{1,2}):',
                             self.msg_content)
        return '{}-{}-{}'.format(get_date.group(4),
                                 str(list(calendar.month_abbr).index(get_date.group(3))).zfill(2),
                                 str(get_date.group(2)).zfill(2))

    def get_header_info(self, msg, indent=0):
        self.msg = msg
        if indent == 0:
            for header in ['From', 'To', 'Subject']:
                value = self.msg.get(header, '')
                if value:
                    if header == 'Subject':
                        value = self.decode_str(value)
                    else:
                        hdr, addr = parseaddr(value)
                        name = self.decode_str(hdr)
                        value = u'%s <%s>' % (name, addr)
                self.header_dict[header] = value
                # print('%s%s: %s' % ('  ' * indent, header, value))
            return self.header_dict

    def get_body_info(self, msg, indent=0):
        self.msg = msg
        if not self.msg.is_multipart():
            content_type = self.msg.get_content_type()
            if content_type == 'text/plain':
                content = self.msg.get_payload(decode=True)
                charset = self.guess_charset(self.msg)
                if charset:
                    content = content.decode(charset)
                self.body = content
        else:
            parts = self.msg.get_payload()
            for n, part in enumerate(parts):
                self.get_body_info(part, indent=indent + 1)
        return self.body

    @staticmethod
    def decode_str(s):
        value, charset = decode_header(s)[0]
        if charset:
            value = value.decode(charset)
        return value

    @staticmethod
    def guess_charset(msg):
        charset = msg.get_charset()
        if charset is None:
            content_type = msg.get('Content-Type', '').lower()
            pos = content_type.find('charset=')
            if pos >= 0:
                charset = content_type[pos + 8:].strip()
        return charset


if __name__ == '__main__':
    ema = Email('[email protected]', 'xxxxxx')
    a = ema.get_header_info(msg=ema.msg)
    b = ema.get_body_info(msg=ema.msg)
    c = ema.get_email_time()
    print(a)
    print(b)
    print(c)


运行结果:

{'From': '王xx', 'To': '王xx ', 'Subject': '测试'}
表格01    表格02    表格03    表格04    表格05    表格06    表格07 
    1    2    3    4    5    6    7 
    2    3    4    5    6    7    8 
    3    4    5    6    7    8    9 
    4    5    6    7    8    9    10 
    5    6    7    8    9    10    11


2022-08-12

你可能感兴趣的:(成长快乐,python,开发语言)