pandas模块实现excel文件的批量读取与保存工具

操作excel的工具类

需要引入logger模块

import os.path
import time

import pandas as pd

from util import logger

LOGGER = logger.MyLogger("excelUtils").getLogger()


class ExcelReader:
    """
    读取excel工具
    """

    def __init__(self, path, file_suffix=".xls", sort_by=None):
        self.path = path
        self.file_suffix = file_suffix
        self.sort_by = sort_by

    def setPath(self, path, file_suffix=".xls", sort_by=None):
        self.__init__(path, file_suffix=file_suffix, sort_by=sort_by)

    def getData(self, sheet_name=0, skiprows=0, skipfooter=0, to_records=False):
        """
        获取excel data
        :return:
        """
        if not os.path.exists(self.path):
            raise FileNotFoundError()
        data = pd.DataFrame()
        if os.path.isfile(self.path):
            data = pd.read_excel(self.path, sheet_name=sheet_name, skiprows=skiprows, skipfooter=skipfooter)
            LOGGER.info("读取Excel文件完毕,共读取1个文件")
        elif os.path.isdir(self.path):
            xls_names = [x for x in os.listdir(self.path) if x.endswith(self.file_suffix)]
            for xls_name in xls_names:
                df = pd.read_excel(os.path.join(self.path, xls_name), sheet_name=sheet_name, skiprows=skiprows,
                                   skipfooter=skipfooter)
                data = data.append(df, sort=False)
            LOGGER.info("读取Excel文件完毕,共读取" + str(xls_names.__len__()) + "个文件")
        if self.sort_by:
            data.sort_values(by=self.sort_by, inplace=True)
        if to_records:
            return data.to_dict(orient="records")
        return data


class ExcelSaver:
    """
    保存excel工具
    """

    def __init__(self, to_path, file_suffix=".xlsx", file_name=None):
        self.to_path = to_path
        self.file_suffix = file_suffix
        self.file_name = file_name

    # 保存字典list为excel
    def saveDictArray(self, data):
        """
        传入数据保存为excel
        :param data: 字典list
        :return:
        """
        if data:
            path = self.combinePath()
            data_frame = pd.DataFrame.from_records(data)
            writer = pd.ExcelWriter(path, engin='openpyxl')
            data_frame.to_excel(excel_writer=writer, index=None)
            writer.save()
            writer.close()

    def saveDataFrame(self,data_frame):
        if data_frame:
            path = self.combinePath()
            writer = pd.ExcelWriter(path, engin='openpyxl')
            data_frame.to_excel(excel_writer=writer, index=None)
            writer.save()
            writer.close()

    def combinePath(self):
        path = self.to_path
        if not os.path.exists(path):
            os.makedirs(path)
        if self.file_name:
            file_name = self.file_name
        else:
            file_name = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
        path = os.path.join(path, file_name + self.file_suffix)
        return path

使用:

from util import excelUtil

读取excel:

excelReader = excelUtil.ExcelReader(path="")

输入path为文件时直接读取指定文件,path要包含文件名,返回dataFrame

输入path为路径时,查找路径下后缀为file_suffix的文件,读取所有内容并整合返回dataFrame

excelRader.getData(sheet_name = 0,skiprows = 0,skipfooter = 0,to_records = True)

​ 其中:

​ sheet_name为指定sheet页的读取,可传int表示第几个,也可传sheet具体名称

​ skiprows:跳过的行,0为全读取,1表示从顶部算起跳过第一行,以此类推

​ skipfooter:跳过的尾,0表示全读取,1表示从底部算起掉过最后一行,以此类推

​ to_records:是否转换为list(dict)格式返回,默认返回dataframe数据

保存excel:

excelSaver = excelUtil.ExcelSaver(to_path = “”,file_name = “”)

excelSaver.saveDictArray(data)

传入数据需要是一个数组/list,内部是一个个数据结构相同的dict,会自动读key值为excel列名

to_path为指定保存到路径,file_name为文件名,缺省时用当前时间生成

你可能感兴趣的:(python)