Python处理EML解析数据,统计及排序并使用pyecharts可视化柱状图输出

 

1. 针对之前eml解析出来的数据,做一个每个内容的统计排序及图形化输出。例如收件人,发件人等等

 

纯记录,我太菜了

# -*- coding:utf-8 -*-
from typing import Dict, Any, List

import openpyxl
from collections import Counter

infos1 = []
infos2 = []
infos3 = []
infos4 = []
# 目标文件
destinefile = 'result_le.xlsx'

#创建wb
info_file = openpyxl.load_workbook(destinefile)
#获取sheet
info_sheet = info_file.get_sheet_by_name('result')
#创建sheet
sheet2 = info_file.create_sheet('From')
sheet3 = info_file.create_sheet('To')
sheet4 = info_file.create_sheet('Cc')
sheet5 = info_file.create_sheet('Subject')
#获取sheet的行数
row_count = info_sheet.max_row
print(row_count)
#获取sheet的内容
for row in range(1, row_count):
    # 1为要统计的列序号,注意从1开始,存入list info
    infos1.append(info_sheet.cell(row,1).value)
    infos2.append(info_sheet.cell(row,2).value)
    infos3.append(info_sheet.cell(row,3).value)
    infos4.append(info_sheet.cell(row,4).value)

#频率统计
def all_list(arr):
    result = {}
    for i in set(arr):
        result[i] = arr.count(i)
    return result

from_list = all_list(infos1)
to_list = all_list(infos2)
cc_list = all_list(infos3)
subject_list = all_list(infos4)

from_sorted_list = sorted(from_list.items(), key = lambda x:x[1], reverse = True)
to_sorted_list = sorted(to_list.items(), key = lambda x:x[1], reverse = True)
cc_sorted_list = sorted(cc_list.items(), key = lambda x:x[1], reverse = True)
subject_sorted_list = sorted(subject_list.items(), key = lambda x:x[1], reverse = True)

for i in range(1,len(from_sorted_list)):
    sheet2.cell(i,1).value = str(from_sorted_list[i]).replace('(','').replace('\'','').replace('\'','').replace(')','').split(',')[0]
    sheet2.cell(i,2).value = int(from_sorted_list[i][1])

for i in range(1,len(to_sorted_list)):
    sheet3.cell(i,1).value = str(to_sorted_list[i][0])
    sheet3.cell(i,2).value = int(to_sorted_list[i][1])

for i in range(1,len(cc_sorted_list)):
    sheet4.cell(i,1).value = str(cc_sorted_list[i][0])
    sheet4.cell(i,2).value = int(cc_sorted_list[i][1])

for i in range(1,len(subject_sorted_list)):
    sheet5.cell(i,1).value = str(subject_sorted_list[i][0])
    sheet5.cell(i,2).value = int(subject_sorted_list[i][1])


info_file.save('eml_analysis.xlsx')
# -*- coding:utf-8 -*-
import openpyxl
import pyecharts
from pyecharts.charts import Bar
from pyecharts import options as opts

#可视化输出
#读取工作表
workbook = openpyxl.load_workbook('eml_analysis.xlsx')
#读取sheet
From = workbook.get_sheet_by_name('From')
To = workbook.get_sheet_by_name('To')
Cc = workbook.get_sheet_by_name('Cc')
Subject = workbook.get_sheet_by_name('Subject')

From_rows = From.max_row
To_rows = To.max_row
Cc_rows = Cc.max_row
Subject_rows = Subject.max_row

From_cols = 2
To_cols = 2
Cc_cols = 2
Subject_cols = 2

xdata_From = []
ydata_From = []
for i in range(1,30):
    xdata_From.append(From.cell(i,1).value)
    ydata_From.append(From.cell(i,2).value)

xdata_To = []
ydata_To = []
for i in range(1,30):
    xdata_To.append(To.cell(i,1).value)
    ydata_To.append(To.cell(i,2).value)

xdata_Cc = []
ydata_Cc = []
for i in range(1,30):
    xdata_Cc.append(Cc.cell(i,1).value)
    ydata_Cc.append(Cc.cell(i,2).value)

xdata_Subject = []
ydata_Subject = []
for i in range(1,30):
    xdata_Subject.append(Subject.cell(i,1).value)
    ydata_Subject.append(Subject.cell(i,2).value)



bar_From = Bar().set_global_opts(title_opts=opts.TitleOpts(title="From发件人群组分析"),xaxis_opts=opts.AxisOpts(name_rotate=300,axislabel_opts={"rotate":60}))
bar_From.add_xaxis(xdata_From)
bar_From.add_yaxis("EML_From_Analysis",ydata_From)
bar_From.render("From.html")

bar_To = Bar().set_global_opts(title_opts=opts.TitleOpts(title="To收件人群组分析"),xaxis_opts=opts.AxisOpts(name_rotate=300,axislabel_opts={"rotate":60}))
bar_To.add_xaxis(xdata_To)
bar_To.add_yaxis("EML_To_Analysis",ydata_To)
bar_To.render("To.html")

bar_Cc = Bar().set_global_opts(title_opts=opts.TitleOpts(title="Cc抄送人群组分析"),xaxis_opts=opts.AxisOpts(name_rotate=300,axislabel_opts={"rotate":60}))
bar_Cc.add_xaxis(xdata_Cc)
bar_Cc.add_yaxis("Cc_Number",ydata_Cc)
bar_Cc.render("Cc.html")

bar_Subject = Bar().set_global_opts(title_opts=opts.TitleOpts(title="Subject邮件主题群组分析"),xaxis_opts=opts.AxisOpts(name_rotate=300,axislabel_opts={"rotate":60}))
bar_Subject.add_xaxis(xdata_Subject)
bar_Subject.add_yaxis("Subject_Number",ydata_Subject)
bar_Subject.render("Subject.html")

print("绘图已完成!")

 

你可能感兴趣的:(可视化,python)