Pandas中如何批量拆分excel以及合并excel_14

import numpy as np
import pandas as pd
import os
"""
本节主要介绍使用pandas批量拆分excel和合并excel
"""

work_dir = './excel_split_merge'
split_dir = f'{work_dir}/splits'

if not os.path.exists(split_dir):
    os.mkdir(split_dir)

# 0.读取源excel到pandas
df_source = pd.read_excel(f'{work_dir}/excel_source.xlsx')
print(df_source.head())
print(df_source.index)
print(df_source.shape)
total_row_count = df_source.shape[0]

# 1.将大的excel等份拆成小的excel
"""
1.使用df.iloc方法,将一个大的dataframe拆分成小的dataframe
2.使用dataframe.to_excel保存每个小的excel
"""
# 首先计算拆分后的每个excel的行数
# 这个大excel会拆分成如下几个人
user_names = ['A','B','C','D','E','F']
# 计算每个人会分到多少数据
split_size = total_row_count // len(user_names)
if total_row_count % len(user_names) != 0:
    split_size += 1
# 拆分成多个dataframe
df_subs = []
for idx,user_name in enumerate(user_names):
    # iloc的开始索引
    begin = idx*split_size
    # iloc的结束索引
    end = begin+split_size
    # 实现df按照iloc进行拆分
    df_sub = df_source.iloc[begin:end]
    # 将每一个子df存入列表
    df_subs.append((idx,user_name,df_sub))
# 将每个dataframe存入excel
for idx,user_name,df_sub in df_subs:
    file_path = f'{split_dir}/zkl_{idx}_{user_name}.xlsx'
    df_sub.to_excel(file_path,index=False)

# 二.合并多个小excel到一个大的excel
"""
1.遍历文件夹,得到需要合并的excel文件列表
2.分别读取到dataframe,给每个df添加一列用于标记来源
3.使用pd.concat进行df批量合并
4.将合并后的dataframe输出到excel
"""
# 1.遍历文件夹,得到需要合并的excel文件列表
excel_names = []
for excel_name in os.listdir(split_dir):
    excel_names.append(excel_name)

# 2.分别读取到dataframe,给每个df添加一列用于标记来源
df_list = []
for excel_name in excel_names:
    # 读取每个excel到df
    excel_path = f'{split_dir}/{excel_name}'
    df_split = pd.read_excel(excel_path)
    # 得到username
    username = excel_name.replace('zkl','').replace('.xlsx','')[2:]
    # 给每个df添加一列
    df_split['username'] = username
    df_list.append(df_split)

# 使用pd.concat进行df批量合并
df_merge = pd.concat(df_list)
print(df_merge.shape)
print(df_merge.head())
print(df_merge['username'].value_counts())

# 将合并后的dataframe输出到excel
df_merge.to_excel(f'{work_dir}/zkl_merge.xlsx',index=False)

你可能感兴趣的:(Pandas,python,python)