Python+Pandas实现批量合并Excel文件

import pandas as pd
from tkinter import filedialog, Tk
import time
import math
import os

root = Tk()  # 创建一个Tkinter.Tk()实例
files_dir = filedialog.askdirectory(initialdir='D:\\', title="请选择要批处理的文件夹")
root.destroy()  # 将Tkinter.Tk()实例销毁

# 开始计时
time_start = time.time()

# 找到所有文件及文件夹名称,可能包含非excel文件或目录,置入列表list_filenames_all[]
list_filenames_all = os.listdir(files_dir)

# 将仅包含xlsx的文件名称置入列表list_filenames_xlsx[]
list_filenames_xlsx = []
for filesname in list_filenames_all:
    if ".xlsx" in filesname:
        list_filenames_xlsx.append(filesname)

print('--------------------------------------------------------------------------------')
print(f'“{files_dir}/”文件夹下,共有Excel文件{len(list_filenames_xlsx)}个,具体如下:')
print('--------------------------------------------------------------------------------')
for filenames_xlsx in list_filenames_xlsx:
    print(filenames_xlsx)
print('--------------------------------------------------------------------------------')

# 获取文件大小,将文件的累计大小存入list_filesize列表,之所以用累计大小,是为了统计进度。
totalsize = 0
list_filesize = []
for filenames_xlsx in list_filenames_xlsx:
    stat_info = os.stat(files_dir + "/" + filenames_xlsx)
    totalsize = totalsize + stat_info.st_size
    list_filesize.append(totalsize)
print(
    f'文件总体大小:{round(totalsize / 1024 / 1024, 2)}MB,预计处理时间约{round(totalsize / 1024 / 1024 * 3.5 / 60, 0)}分钟')
print('--------------------------------------------------------------------------------')
print('合并进行中,请稍候……')

# 创建一个新目录存放 excel 文件
output_dir = files_dir + '/合并excel'
output_path = output_dir + '/' + "批量合并.xlsx"  # 构造绝对路径
if not os.path.exists(output_dir):  # 如果目录不存在就创建
    os.makedirs(output_dir)

df_merge = pd.DataFrame()
for i in range(0, len(list_filenames_xlsx)):
    df = pd.read_excel(files_dir + '/' + list_filenames_xlsx[i])  # 打开文件
    df_merge = pd.concat([df_merge, df])  # 将df合并至df_merge
    print(f'合并完成:{list_filenames_xlsx[i]}')
print('--------------------------------------------------------------------------------')
print(f'正在写入数据……')
writer = pd.ExcelWriter(output_path)
df_merge.to_excel(writer, sheet_name='Sheet1', startrow=0, index=False, header=True, encoding="utf-8")
print(f'正在保存表格……')
writer.save()

# 计时结束
time_end = time.time()
if (time_end - time_start) < 60:
    seconds = round((time_end - time_start), 0)
    minuits = 0
seconds = round((time_end - time_start) % 60, 0)
minuits = math.floor((time_end - time_start) / 60)
print('--------------------------------------------------------------------------------')
print(f'导出完成!!导出目录至{output_dir}/')
print(f'实际运行时间:{minuits}分 {seconds} 秒')
print('--------------------------------------------------------------------------------')

你可能感兴趣的:(Python,Pandas,python,经验分享)