需要引入logger模块
import os.path
import time
import pandas as pd
from util import logger
LOGGER = logger.MyLogger("excelUtils").getLogger()
class ExcelReader:
"""
读取excel工具
"""
def __init__(self, path, file_suffix=".xls", sort_by=None):
self.path = path
self.file_suffix = file_suffix
self.sort_by = sort_by
def setPath(self, path, file_suffix=".xls", sort_by=None):
self.__init__(path, file_suffix=file_suffix, sort_by=sort_by)
def getData(self, sheet_name=0, skiprows=0, skipfooter=0, to_records=False):
"""
获取excel data
:return:
"""
if not os.path.exists(self.path):
raise FileNotFoundError()
data = pd.DataFrame()
if os.path.isfile(self.path):
data = pd.read_excel(self.path, sheet_name=sheet_name, skiprows=skiprows, skipfooter=skipfooter)
LOGGER.info("读取Excel文件完毕,共读取1个文件")
elif os.path.isdir(self.path):
xls_names = [x for x in os.listdir(self.path) if x.endswith(self.file_suffix)]
for xls_name in xls_names:
df = pd.read_excel(os.path.join(self.path, xls_name), sheet_name=sheet_name, skiprows=skiprows,
skipfooter=skipfooter)
data = data.append(df, sort=False)
LOGGER.info("读取Excel文件完毕,共读取" + str(xls_names.__len__()) + "个文件")
if self.sort_by:
data.sort_values(by=self.sort_by, inplace=True)
if to_records:
return data.to_dict(orient="records")
return data
class ExcelSaver:
"""
保存excel工具
"""
def __init__(self, to_path, file_suffix=".xlsx", file_name=None):
self.to_path = to_path
self.file_suffix = file_suffix
self.file_name = file_name
# 保存字典list为excel
def saveDictArray(self, data):
"""
传入数据保存为excel
:param data: 字典list
:return:
"""
if data:
path = self.combinePath()
data_frame = pd.DataFrame.from_records(data)
writer = pd.ExcelWriter(path, engin='openpyxl')
data_frame.to_excel(excel_writer=writer, index=None)
writer.save()
writer.close()
def saveDataFrame(self,data_frame):
if data_frame:
path = self.combinePath()
writer = pd.ExcelWriter(path, engin='openpyxl')
data_frame.to_excel(excel_writer=writer, index=None)
writer.save()
writer.close()
def combinePath(self):
path = self.to_path
if not os.path.exists(path):
os.makedirs(path)
if self.file_name:
file_name = self.file_name
else:
file_name = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
path = os.path.join(path, file_name + self.file_suffix)
return path
使用:
from util import excelUtil
读取excel:
excelReader = excelUtil.ExcelReader(path="")
输入path为文件时直接读取指定文件,path要包含文件名,返回dataFrame
输入path为路径时,查找路径下后缀为file_suffix的文件,读取所有内容并整合返回dataFrame
excelRader.getData(sheet_name = 0,skiprows = 0,skipfooter = 0,to_records = True)
其中:
sheet_name为指定sheet页的读取,可传int表示第几个,也可传sheet具体名称
skiprows:跳过的行,0为全读取,1表示从顶部算起跳过第一行,以此类推
skipfooter:跳过的尾,0表示全读取,1表示从底部算起掉过最后一行,以此类推
to_records:是否转换为list(dict)格式返回,默认返回dataframe数据
保存excel:
excelSaver = excelUtil.ExcelSaver(to_path = “”,file_name = “”)
excelSaver.saveDictArray(data)
传入数据需要是一个数组/list,内部是一个个数据结构相同的dict,会自动读key值为excel列名
to_path为指定保存到路径,file_name为文件名,缺省时用当前时间生成