Message-ID: 23936138.1075855378263.JavaMail.evans@thyme
Date: Wed, 9 May 2001 17:13:00 -0700 (PDT)
From: [email protected]
To: [email protected]
Subject: Re:
Mime-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
X-From: Phillip K Allen
X-To: John J Lavorato Lavorato/ENRON@enronXgate@ENRON>
X-cc:
X-bcc:
X-Folder: \Phillip_Allen_Jan2002_1\Allen, Phillip K.'Sent Mail
X-Origin: Allen-P
X-FileName: pallen (Non-Privileged).pst
The west desk would like 2 analysts.
from email.parser import Parser
file_to_read = 'D:\\安然数据集\\maildir\\allen-p\\_sent_mail\\5'
with open(file_to_read, "r") as f:
data = f.read()
#print(data)
#经过parsestr处理过后生成一个字典
email = Parser().parsestr(data)
# 获取发件人
print("\n From: " , email['from'])
# 获取收件人
print("\nTo: " , email['to'])
# 获取日期
print("\n Date:" , email['Date'])
# 获取邮件主题
print("\n Subject: " , email['subject'])
# 获取邮件正文内容
print("\n \n Body: " , email.get_payload())
符合邮件内容
实现和单个邮件内容的提取相同
import os
from email.parser import Parser
# path指定的目录
path = "D:\\安然数据集\\maildir\\allen-p\\contacts"
def email_analyse(inputfile, to_email_list, from_email_list, email_body):
with open(inputfile, "r") as f:
data = f.read()
email = Parser().parsestr(data)
# 将提取的内容存放到对应的列表中
to_email_list.append(email['to'])
from_email_list.append(email['from'])
email_date.append(email['date'])
email_body.append(email.get_payload())
to_email_list = []
from_email_list = []
email_date = []
email_body = []
for directory, subdirectory, filenames in os.walk(path):
for filename in filenames:
email_analyse(os.path.join(directory, filename), to_email_list, from_email_list, email_body )
# os.path.join(path1, *path) 把 path1,path2 文件和目录合并成一个路径
# 以下是将列表存放的内容写到对应的txt文本中
with open("to_email_list.txt", "w") as f:
for to_email in to_email_list:
if to_email:
f.write(to_email)
f.write("\n")
with open("from_email_list.txt", "w") as f:
for from_email in from_email_list:
if from_email:
f.write(from_email)
f.write("\n")
with open("email_body.txt", "w") as f:
for email_bod in email_body:
if email_bod:
f.write(email_bod)
f.write("\n")
with open("email_date.txt", "w") as f:
for date in email_date:
if date:
f.write(date)
f.write("\n")
可以对比生成的txt文本和邮件的内容检查是否有误