有时我们要针对一些数据做统计报告,在文件过多亦或数据量大,excel操作重复操作又过多等情况下,我们可以利用Python进行数据分析处理。
假设有文本数据如下:
201801136 61.0 68.0 60.0
201801137 94.0 64.0 75.0
201801138 88.0 77.0 82.0
201801139 87.0 84.0 72.0
201801140 93.0 69.0 74.0
我们利用python读取并将除了第一列学号外的数据,整理到numpy矩阵中,代码如下:
def read_data(file):
with open(file, "r") as f:
id, subject_1, subject_2, subject_3 =[], [], [], []
for line in f.readlines():
line = line.strip('\n') # 去掉列表中每一个元素的换行符
line = line.split(' ')
id.append((line[0]))
subject_1.append(float(line[1]))
subject_2.append(float(line[2]))
subject_3.append(float(line[3]))
row, col = len(subject_1), 3
table = np.zeros((row, col))
table[:, 0], table[:, 1], table[:, 2] = subject_1, subject_2, subject_3
# head = ['subject_1', 'subject_2', 'subject_3']
# data = pd.DataFrame(table)
# data.columns = head
return table
def write_excel(file_name):
workbook = xlsxwriter.Workbook(file_name)
# 在文件中创建一个名为TEST的sheet,不加名字默认为sheet1
worksheet = workbook.add_worksheet(u'Score')
worksheet.write(0, 0, '数学')
worksheet.write(1, 1, 521.1314)
workbook.close()
style = workbook.add_format({
"fg_color": "yellow", # 单元格的背景颜色
"bold": 1, # 字体加粗
"align": "center", # 对齐方式
"valign": "vcenter", # 字体对齐方式
"font_color": "red" # 字体颜色
})
# 在Excel的第r行第c列以style格式写入data[r][c]数据
worksheet.write(r, c, data[r][c], style)
worksheet.write_row('B1', head, style_more) # 写入表头
# 设置单元格宽度
width = 20
worksheet.set_column(0, col, width)
worksheet.write(row, col+1, '=SUM(B6:D6)')
# 设置数据格式为百分比
style_pre = workbook.add_format({'num_format': '0.000%'})
chart = workbook.add_chart({"type": "column"})
# column 柱状图
# area面积图
# bar 条形图
# line折现图
# radar雷达图
# 5为图表添加数据
chart.add_series(
{"name": "成绩", # 标题
"categories": "=Score!$b$1:$d$1", # 统计项名称 工作簿名称+数据
"values": "=Score!$b$2:$d$2", # 统计值 工作簿名称+数据
"line": {"color": "black", "bold": True} # 柱子边颜色
}
)
worksheet.insert_chart("A11", chart)
# 6) 插入本地图片(例如用matplotlib画的图)
x = np.linspace(0, np.pi)
y = np.sin(x)
plt.plot(x, y, color='red', marker='+')
plt.savefig('sin.png') # 保存图片
plt.show()
worksheet.insert_image('D16', 'sin.png')
# 冻结第一行和第一列
worksheet.freeze_panes(1, 1)
import os
import numpy as np
import pandas as pd
import xlrd
import xlwt
import xlsxwriter
import matplotlib.pyplot as plt
def read_data(file):
with open(file, "r") as f:
id, subject_1, subject_2, subject_3 =[], [], [], []
for line in f.readlines():
line = line.strip('\n') # 去掉列表中每一个元素的换行符
line = line.split(' ')
id.append((line[0]))
subject_1.append(float(line[1]))
subject_2.append(float(line[2]))
subject_3.append(float(line[3]))
row, col = len(subject_1), 3
table = np.zeros((row, col))
table[:, 0], table[:, 1], table[:, 2] = subject_1, subject_2, subject_3
# head = ['subject_1', 'subject_2', 'subject_3']
# data = pd.DataFrame(table)
# data.columns = head
return table
def write_excel_demo(file_name):
workbook = xlsxwriter.Workbook(file_name)
# 在文件中创建一个名为TEST的sheet,不加名字默认为sheet1
worksheet = workbook.add_worksheet(u'Test')
worksheet.write(0, 0, '数学')
workbook.close()
def write_excel(file_name, data):
workbook = xlsxwriter.Workbook(file_name)
# 1)设置单元格格式
style = workbook.add_format({
"align": "center", # 对齐方式
"valign": "vcenter", # 字体对齐方式
})
style_more = workbook.add_format({
"fg_color": "yellow", # 单元格的背景颜色
"bold": 1, # 字体加粗
"align": "center", # 对齐方式
"valign": "vcenter", # 字体对齐方式
"font_color": "red" # 字体颜色
})
# 2)设置数据格式为百分比
style_pre = workbook.add_format({'num_format': '0.000%'})
worksheet = workbook.add_worksheet(u'Score') # 在文件中创建一个名为Score的sheet,不加名字默认为sheet1
row, col = len(data), len(data[0, :])
width = 20
# 3) 设置单元格宽度
worksheet.set_column(0, col, width)
head = ['subject_1', 'subject_2', 'subject_3']
'''
for i in range(len(head)):
worksheet.write(0, i+1, head[i], style_more)
'''
worksheet.write_row('B1', head, style_more) # 写入表头
# 7) 冻结第一行和第一列
worksheet.freeze_panes(1, 1)
for i in range(row):
for j in range(col):
worksheet.write(i+1, j+1, data[i][j], style)
# 4) 调用Excel自身的公式
worksheet.write(row, col+1, '=SUM(B6:D6)')
# 5) 插入Excel图表
chart = workbook.add_chart({"type": "column"})
# column 柱状图
# area面积图
# bar 条形图
# line折现图
# radar雷达图
# 5为图表添加数据
chart.add_series(
{"name": "成绩", # 标题
"categories": "=Score!$b$1:$d$1", # 统计项名称 工作簿名称+数据
"values": "=Score!$b$2:$d$2", # 统计值 工作簿名称+数据
"line": {"color": "black", "bold": True} # 柱子边颜色
}
)
worksheet.insert_chart("A11", chart)
# 6) 插入本地图片(例如用matplotlib画的图)
x = np.linspace(0, np.pi)
y = np.sin(x)
plt.plot(x, y, color='red', marker='+')
plt.savefig('sin.png') # 保存图片
plt.show()
worksheet.insert_image('D16', 'sin.png')
workbook.close()
if __name__ == '__main__':
file_name = "test.xlsx"
write_excel_demo(file_name)
data = read_data("data/score.txt")
file_name = 'result.xlsx'
write_excel(file_name, data)
本文分节详细,一是为了后续查寻方便,二是后续在学习工作中也会持续总结,毕竟熟能生巧,万一一段时间不用,不熟练了也可以回来查找。以上代码和数据都可以去我的GitHub下载。