本文对openpyxl和pd.ExcelFile两种方法进行简要说明。openpyxl使用后需close()文件,pd.ExcelFile不需要。
import openpyxl
import pandas as pd
# 以只读打开《放行单》excle文件(可以提高运行速度).
fxd_Data = openpyxl.load_workbook(filename_fxd, read_only=True)
# 获取所有的sheetname
fxd_sheetnames = fxd_Data.sheetnames
# 读取第一个sheet中的数据
fxd_sheet0 = fxd_Data[fxd_sheetnames[0]]
# 获取最大行
fxd_maxrows = fxd_sheet0.max_row
# 获取最大列
fxd_maxcol = fxd_sheet0.max_column
# 找到"买方质量代表"所在行号-2=发货清单数据区
for row in fxd_sheet0.iter_rows(min_row=1, max_row=fxd_maxrows, min_col=1, max_col=1):
for cell in row:
if cell.value == "1.检验产品清单:":
data_row_start = cell.row + 1 # a1.row,结果:行号 1;a1.column,结果:列号 1;a1.coordinate,结果:单元格号 A1
if cell.value == "买方质量代表":
data_row_end = cell.row - 3 # a1.row,结果:行号 1;a1.column,结果:列号 1;a1.coordinate,结果:单元格号 A1
# 判断放行检验单_数据区中为NaN的值的范围
for i in range(data_row_start, data_row_end):
if fxd_sheet0.cell(i, 2).value is None:
nan_value_flag = i - 1
break
# 读取数据区
fxd_sheet0_Data = pd.read_excel(filename_fxd, fxd_sheetnames=fxd_sheetnames[0],
header=data_row_start - 1, index_col=0, usecols="A:G",
nrows=(nan_value_flag - data_row_start))
# 将物料描述列数据以“\”进行切割,fxd_wuliao_list数据,名称|图号|材料|表面处理工艺
fxd_wuliao_list = []
for i in range(1, len(fxd_sheet0_Data["物料描述"]) + 1):
fxd_wuliao_list.append(fxd_sheet0_Data["物料描述"][i].split("\\"))
import pandas as pd
# 打开《放行单》excle文件
fxd_Data = pd.ExcelFile(filename_fxd)
# 获取所有的sheetname
fxd_sheetnames = fxd_Data.sheet_names
# 读取第一个sheet中的数据
fxd_Data_sheet0 = pd.ExcelFile.parse(fxd_Data, sheet_name=fxd_sheetnames[0])
# 提取放行检验时间
fxd_time_org = fxd_Data_sheet0.iloc[2, 4]
try:
try:
index_time0 = fxd_time_org.index(":")
except:
index_time0 = fxd_time_org.index(":")
index_time_Y = fxd_time_org.index("年", index_time0)
index_time_M = fxd_time_org.index("月", index_time_Y)
index_time_D = fxd_time_org.index("日", index_time_M)
fxd_time_Y = fxd_time_org[(index_time0 + 1):index_time_Y].replace(' ', '').zfill(4)
fxd_time_M = fxd_time_org[(index_time_Y + 1):index_time_M].replace(' ', '').zfill(2)
fxd_time_D = fxd_time_org[(index_time_M + 1):index_time_D].replace(' ', '').zfill(2)
fxd_time = '.'.join([str(fxd_time_Y), str(fxd_time_M), str(fxd_time_D)])
# print("放行检验时间:", fxd_time)
except:
self.show_critical_message("错误", "检验日期读取失败")
fxd_time = np.nan
# 获取放行检验单最大行
fxd_maxrows = fxd_Data_sheet0.shape[0] # 数据总行数
fxd_maxcols = fxd_Data_sheet0.shape[1] # 数据总列数
# 找到"买方质量代表"所在行号-2=发货清单数据区
for i in range(fxd_maxrows):
for j in range(fxd_maxcols):
if int(str(fxd_Data_sheet0.iloc[i, j]).find("检验产品清单")) >= 0:
data_row_start = i + 1
if int(str(fxd_Data_sheet0.iloc[i, j]).find("买方质量代表")) >= 0:
data_row_end = i - 2
# 判断放行检验单_数据区中为NaN的值的范围
for i in range(data_row_start, data_row_end):
if pd.isnull(fxd_Data_sheet0.iloc[i, 2]):
nan_value_flag = i
break
# 读取《放行单》数据区
fxd_Data_sheet0_df = fxd_Data_sheet0.iloc[(data_row_start + 1):nan_value_flag, :]
# 设置数据区列索引
fxd_Data_sheet0_df.columns = fxd_Data_sheet0.iloc[data_row_start, :]
# 重置数据区行索引
fxd_Data_sheet0_df = fxd_Data_sheet0_df.reset_index(drop=True)
# 将物料描述列数据以“\”进行切割,fxd_wuliao_list数据,名称|图号|材料|表面处理工艺
fxd_wuliao_list = []
fxd_wuliao_list_col = 5 # fxd_wuliao_list最大划分为 fxd_wuliao_list_col 个数据
fxd_wuliao_list_row = nan_value_flag - data_row_start - 1 # fxd_wuliao_list_col 的行数
# 填充 fxd_wuliao_list 数据
for i in range(fxd_wuliao_list_row):
fxd_wuliao_list.append(fxd_Data_sheet0_df["物料描述"][i].split("\\"))
temp_col = len(fxd_wuliao_list[i])
if temp_col < fxd_wuliao_list_col:
col_sub = fxd_wuliao_list_col - temp_col
for j in range(col_sub):
fxd_wuliao_list[i].append(np.nan)
# print("名称|图号|材料|表面处理工艺", fxd_wuliao_list)
# 提取批次号 fxd_Data_sheet0_df["序列号/批次号"][i]
fxd_pici_list = [[] for i in range(fxd_wuliao_list_row)]
for i in range(fxd_wuliao_list_row):
if pd.isnull(fxd_Data_sheet0_df["序列号/批次号"][i]):
fxd_pici_list[i] = [np.nan, np.nan]
else:
fxd_pici_list[i] = fxd_Data_sheet0_df["序列号/批次号"][i].split("\\")