python读取文件列表,pandas操作excel

python读取文件列表,pandas操作excel

目标

  • 读取指定文件下所有excel文件
  • 根据excel文件一列的内容分条件新增列
  • 保存并输入新excel文件

思路

  • 读取文件利用os.walk()
  • 判断、新增列利用pandas
  • 输入新excel文件利用pandas的ExcelWriter()
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Dec  3 13:27:49 2018

@author: yejishan
"""

import pandas as pd
import os


# 读取并存储输入文件名称
def get_files(input_root):
    file_list = []
    for root, dirs, files in os.walk(input_file):
        for i in files:
            if i.split('.')[-1] == 'xls':
                file_list.append(i)

    return file_list


# code处理
def add_code(file):
    data = pd.read_excel(input_file + os.path.sep + file)
    data['code'] = None
    res = data['res']

    for i in range(len(res)):
        if res[i] == 'k01':
            data.loc[i, 'code'] = int(str(data['trigger'][i]) + '01')
        elif res[i] == 1:
            data.loc[i, 'code'] = int(str(data['trigger'][i]) + '10')
        elif res[i] == 2:
            data.loc[i, 'code'] = int(str(data['trigger'][i]) + '20')
        elif res[i] == 'k03':
            data.loc[i, 'code'] = int(str(data['trigger'][i]) + '03')
        elif res[i] == 3:
            data.loc[i, 'code'] = int(str(data['trigger'][i]) + '30')
        elif res[i] == 74:
            data.loc[i, 'code'] = int(str(data['trigger'][i])[:2] + '74')
        elif res[i] == 75:
            data.loc[i, 'code'] = int(str(data['trigger'][i])[:2] + '75')
        print(file + "'s " + str(i) + ' rows is finished.')

    writer = pd.ExcelWriter(output_file + os.path.sep + 'code_' + file)
    data.to_excel(writer, 'Sheet1', index=False)
    writer.save()
    print(file + ' is saved.')


# 程序入口
if __name__ == "__main__":
    # 设置出入和输出文件夹
    input_file = "输入文件路径"
    output_file = "输出路径"

    file_list = get_files(input_file)

    for file in file_list:
        add_code(file)

总结

  • 处理速度有点慢
  • 但内存占用不高
  • 利用dataFrame选取列时,data.loc[]很方便

你可能感兴趣的:(数据处理,python,pandas)