利用Python读取Excel表格并可视化

前言

最近学习数据分析,感觉Python做数据分析真的好用。
PDF转换为Excel,可以用这个网址转化:
https://www.ilovepdf.com/zh-cn/pdf_to_excel

正文

安装Pycharts

conda install xlrd
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple pyecharts

一、准备数据

1、载入Excel 表格

# coding=utf8
import xlrd
import numpy as np
from pyecharts.charts import Bar
from pyecharts.charts import Pie, Grid
from pyecharts import options as opts

# ==================== 准备数据 ====================
# 导入Excel 文件
data = xlrd.open_workbook("机电学院2020年硕士研究生复试拟录取结果公示.xlsx")
# 载入第一个表格
table = data.sheets()[0]

2、提取Excel 表格数据

tables = []


def Read_Excel(excel):
    # 从第2行开始读取数据,因为这个Excel文件里面从第四行开始才是考生信息
    for rows in range(1, excel.nrows - 1):
        dict_ = {"id": "", "name": "", "status": "", "preliminary_score": "", "retest_score": "", "total_score": "",
                 "ranking": ""}
        dict_["id"] = table.cell_value(rows, 0)
        dict_["name"] = table.cell_value(rows, 1)
        dict_["status"] = table.cell_value(rows, 8)
        dict_["remarks"] = table.cell_value(rows, 3)
        dict_["preliminary_score"] = table.cell_value(rows, 5)
        dict_["retest_score"] = table.cell_value(rows, 6)
        dict_["total_score"] = table.cell_value(rows, 7)
        dict_["ranking"] = table.cell_value(rows, 10)
        # 将未被录取或者非普通计划录取的考生滤除
        if dict_["status"] == str("是") and dict_["remarks"] == str("全日制"):
            tables.append(dict_)

打印数据:

# 执行上面方法
Read_Excel(table)
for i in tables:
    print(i)

3、数据分段统计
根据自己的需求进行修改

num_score_300_310 = 0
num_score_310_320 = 0
num_score_320_330 = 0
num_score_330_340 = 0
num_score_340_350 = 0
num_score_350_360 = 0
num_score_360_370 = 0
num_score_370_380 = 0
num_score_380_390 = 0
num_score_390_400 = 0
num_score_400_410 = 0
min_score = 999
max_score = 0

# 将各个分段的数量统计
for i in tables:
    score = i["preliminary_score"]
    if score > max_score:
        max_score = score
    if score < min_score:
        min_score = score

    if score in range(300, 310):
        num_score_300_310 = num_score_300_310 + 1
    elif score in range(310, 320):
        num_score_310_320 = num_score_310_320 + 1
    elif score in range(320, 330):
        num_score_320_330 = num_score_320_330 + 1
    elif score in range(330, 340):
        num_score_330_340 = num_score_330_340 + 1
    elif score in range(340, 350):
        num_score_340_350 = num_score_340_350 + 1
    elif score in range(350, 360):
        num_score_350_360 = num_score_350_360 + 1
    elif score in range(360, 370):
        num_score_360_370 = num_score_360_370 + 1
    elif score in range(370, 380):
        num_score_370_380 = num_score_370_380 + 1
    elif score in range(380, 390):
        num_score_380_390 = num_score_380_390 + 1
    elif score in range(390, 400):
        num_score_390_400 = num_score_390_400 + 1
    elif score in range(400, 410):
        num_score_400_410 = num_score_400_410 + 1

# 构建两个元组用以后期建表方便
bar_x_axis_data = ("300-310", "310-320", "320-330", "330-340", "340-350", "350-360", "360-370", "370-380", "380-390", "390-400", "400-410")
bar_y_axis_data = (num_score_300_310, num_score_310_320, num_score_320_330,
                   num_score_330_340, num_score_340_350, num_score_350_360,
                   num_score_360_370, num_score_370_380, num_score_380_390,
                   num_score_390_400, num_score_400_410)

4、绘制可视化图形
有网站可以参考:https://pyecharts.org/#/zh-cn/intro
绘制柱状图:

# ===================== 柱状图 =====================
# 构建柱状图
c = (
    Bar()
        .add_xaxis(bar_x_axis_data)
        .add_yaxis("录取考生", bar_y_axis_data, color="#af00ff")
        .set_global_opts(title_opts=opts.TitleOpts(title="数量"))
        .render("./录取数据图.html")
)

绘制饼状图:

# ====================== 饼图 ======================
c = (
    Pie(init_opts=opts.InitOpts(height="800px", width="1200px"))
        .add("录取分数概览",
             [list(z) for z in zip(bar_x_axis_data, bar_y_axis_data)],
             center=["35%", "38%"],
             radius="40%",
             label_opts=opts.LabelOpts(
                 formatter="{b|{b}: }{c}  {per|{d}%}  ",
                 rich={
                     "b": {"fontSize": 16, "lineHeight": 33},
                     "per": {
                         "color": "#eee",
                         "backgroundColor": "#334455",
                         "padding": [2, 4],
                         "borderRadius": 2,
                     },
                 }
             ))
        .set_global_opts(title_opts=opts.TitleOpts(title="录取", subtitle='Made by rengar'),
                         legend_opts=opts.LegendOpts(pos_left="0%", pos_top="65%"))
        .render("./录取饼图.html")
)

生成了两个html文件,在你当前文件夹下,打开可以看到图

利用Python读取Excel表格并可视化_第1张图片
利用Python读取Excel表格并可视化_第2张图片
中间可以根据自己需要,再进行修改代码
这个实验的所有代码、数据、结果都在我的码云里保存,下面是链接
https://gitee.com/rengarwang/Excel-visualization

你可能感兴趣的:(数据分析(Python),数据分析和可视化)