需求:从几百个CSV或xls中读取某些重要数据,并汇总在一个单独的excel中进行数据分析
Python实现:
# coding:utf-8
# File Name: csv_data_sort
# Description :
# Author : micro
# Date: 2019/9/17
import glob, os
import csv
import xlrd, xlwt
from xlutils.copy import copy
def run():
# 第一步 遍历读取文件夹,获取每个csv的路径
path = r'C:\Users\micro\Desktop\2018流量'
files = glob.glob(os.path.join(path, "*.csv"))
# 第二步 分别遍历每个文件,并获取所需要网站的UV
all_result = []
for file in files:
csvFile = open(file, "r")
reader = csv.reader(csvFile)
for item in reader:
if (len(item) > 1):
if (item[0] == "www.baidu.com"):
xx1 = item[2]
if (item[0] == "www.baidu.com"):
xx2 = item[2]
if (item[0] == 'www.baidu.com'):
xx3 = item[2]
if (item[0] == "www.baidu.com"):
xx4 = item[2]
if (item[0] == "www.baidu.com"):
xx5 = item[2]
if (item[0] == "www.baidu.com"):
xx6 = item[2]
result = [xx1,xx2,xx3,xx4,xx5,xx6]
all_result.append(result)
write_excel_xls_append(r"C:\Users\micro\Desktop\2018.xls", all_result)
def write_excel_xls_append(path, value):
index = len(value) # 获取需要写入数据的行数
workbook = xlrd.open_workbook(path) # 打开工作簿
sheets = workbook.sheet_names() # 获取工作簿中的所有表格
worksheet = workbook.sheet_by_name(sheets[0]) # 获取工作簿中所有表格中的的第一个表格
rows_old = worksheet.nrows # 获取表格中已存在的数据的行数
new_workbook = copy(workbook) # 将xlrd对象拷贝转化为xlwt对象
new_worksheet = new_workbook.get_sheet(0) # 获取转化后工作簿中的第一个表格
for i in range(0, index):
for j in range(0, len(value[i])):
new_worksheet.write(i + rows_old, j, value[i][j]) # 追加写入数据,注意是从i+rows_old行开始写入
new_workbook.save(path) # 保存工作簿
print("xls格式表格【追加】写入数据成功!")
if __name__ == '__main__':
run()