0062.467 KLcin Cont -0.2378 V [AWEM0]
0062.485 dffin city -0.5893 V [BWEM1]
0052.505 deslin test -0.4378 V [CWE]
0022.529 monday morning -0.5679 V [DME]
0032.547 every word -0.4938 V [ELS]
0042.564 Hello Word -0.2567 V [FRA1]
注释:真实txt内容不便展示,这里只象征性的展示txt内容的格式
import pandas as pd
import re
import openpyxl
from openpyxl.utils import get_column_letter
import glob
# read_txt()用来读取单个txt文件
def read_txt(DC_P, FUNC_P, files):
FUNC_Item = []
FBC_Set = []
file = open(files, encoding='utf-8')
for line in file:
# 匹配Lot_ID, Wafer_ID, Die_X, Die_Y的值
if re.match(r'.*(B\w\d+_\w\d|A\w\d+_\w\d|DEBUG_\w\d).*W.{2}\s*(\d+)\s*Die.*X.(\d+)\s*Y.(\d+).', line, re.I):
Lot_Wafer_Die = re.match(r'.*(B\w\d+_\w\d|A\w\d+_\w\d|DEBUG_\w\d).*W.{2}\s*(\d+)\s*Die.*X.(\d+)\s*Y.(\d+).', line, re.I)
Lot_ID = Lot_Wafer_Die.group(1)
Wafer_ID = Lot_Wafer_Die.group(2)
Die_X = Lot_Wafer_Die.group(3)
Die_Y = Lot_Wafer_Die.group(4)
DC_P.setdefault('Lot_ID', []).append(Lot_ID)
DC_P.setdefault('Wafer_ID', []).append(Wafer_ID)
DC_P.setdefault('Die_X', []).append(Die_X)
DC_P.setdefault('Die_Y', []).append(Die_Y)
# 匹配Bin的值
elif re.match(r'\s*First Fail (Bin)\s*.\s*(\d+)', line, re.I):
Bin_Sort = re.match(r'\s*First Fail (Bin)\s*.\s*(\d+)', line, re.I)
DC_P.setdefault(Bin_Sort.group(1), []).append(Bin_Sort.group(2))
# 匹配Isb DVDD_MEMO的值
elif re.match(r'.*(Isb DVDD_MEM0)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
Isb_DVDD_MEM0 = re.match(r'.*(Isb DVDD_MEM0)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
DC_P.setdefault(Isb_DVDD_MEM0.group(1) + '_' + Isb_DVDD_MEM0.group(3), []).append(Isb_DVDD_MEM0.group(2))
# 匹配Isb DVDD_MEMO_LS的值
elif re.match(r'.*(Isb DVDD_MEM0_LS)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
Isb_DVDD_MEM0_LS = re.match(r'.*(Isb DVDD_MEM0_LS)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
DC_P.setdefault(Isb_DVDD_MEM0_LS.group(1) + '_' + Isb_DVDD_MEM0_LS.group(3), []).append(Isb_DVDD_MEM0_LS.group(2))
# 匹配Isb DVDD_MEMO_SD的值
elif re.match(r'.*(Isb DVDD_MEM0_SD)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
Isb_DVDD_MEM0_SD = re.match(r'.*(Isb DVDD_MEM0_SD)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
DC_P.setdefault(Isb_DVDD_MEM0_SD.group(1) + '_' + Isb_DVDD_MEM0_SD.group(3), []).append(Isb_DVDD_MEM0_SD.group(2))
# 匹配Isb DVDD_MEMO_DS的值
elif re.match(r'.*(Isb DVDD_MEM0_DS)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
Isb_DVDD_MEM0_DS = re.match(r'.*(Isb DVDD_MEM0_DS)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
DC_P.setdefault(Isb_DVDD_MEM0_DS.group(1) + '_' + Isb_DVDD_MEM0_DS.group(3), []).append(Isb_DVDD_MEM0_DS.group(2))
# 匹配Isb DVDD_MEM1的值
elif re.match(r'.*(Isb DVDD_MEM1)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
Isb_DVDD_MEM1 = re.match(r'.*(Isb DVDD_MEM1)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
DC_P.setdefault(Isb_DVDD_MEM1.group(1) + '_' + Isb_DVDD_MEM1.group(3), []).append(Isb_DVDD_MEM1.group(2))
# 匹配Isb DVDD_MEM1_LS的值
elif re.match(r'.*(Isb DVDD_MEM1_LS)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
Isb_DVDD_MEM1_LS = re.match(r'.*(Isb DVDD_MEM1_LS)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
DC_P.setdefault(Isb_DVDD_MEM1_LS.group(1) + '_' + Isb_DVDD_MEM1_LS.group(3), []).append(Isb_DVDD_MEM1_LS.group(2))
# 匹配Isb DVDD_MEM1_SD的值
elif re.match(r'.*(Isb DVDD_MEM1_SD)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
Isb_DVDD_MEM1_SD = re.match(r'.*(Isb DVDD_MEM1_SD)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
DC_P.setdefault(Isb_DVDD_MEM1_SD.group(1) + '_' + Isb_DVDD_MEM1_SD.group(3), []).append(Isb_DVDD_MEM1_SD.group(2))
# 匹配Isb DVDD_MEM1_DS的值
elif re.match(r'.*(Isb DVDD_MEM1_DS)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
Isb_DVDD_MEM1_DS = re.match(r'.*(Isb DVDD_MEM1_DS)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
DC_P.setdefault(Isb_DVDD_MEM1_DS.group(1) + '_' + Isb_DVDD_MEM1_DS.group(3), []).append(Isb_DVDD_MEM1_DS.group(2))
# 匹配Isb DVDD_MEM2的值
elif re.match(r'.*(Isb DVDD_MEM2)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
Isb_DVDD_MEM2 = re.match(r'.*(Isb DVDD_MEM2)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
DC_P.setdefault(Isb_DVDD_MEM2.group(1) + '_' + Isb_DVDD_MEM2.group(3), []).append(Isb_DVDD_MEM2.group(2))
# 匹配Isb DVDD_MEM2_LS的值
elif re.match(r'.*(Isb DVDD_MEM2_LS)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
Isb_DVDD_MEM2_LS = re.match(r'.*(Isb DVDD_MEM2_LS)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
DC_P.setdefault(Isb_DVDD_MEM2_LS.group(1) + '_' + Isb_DVDD_MEM2_LS.group(3), []).append(Isb_DVDD_MEM2_LS.group(2))
# 匹配Isb DVDD_MEM2_SD的值
elif re.match(r'.*(Isb DVDD_MEM2_SD)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
Isb_DVDD_MEM2_SD = re.match(r'.*(Isb DVDD_MEM2_SD)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
DC_P.setdefault(Isb_DVDD_MEM2_SD.group(1) + '_' + Isb_DVDD_MEM2_SD.group(3), []).append(Isb_DVDD_MEM2_SD.group(2))
# 匹配Isb DVDD_MEM2_DS的值
elif re.match(r'.*(Isb DVDD_MEM2_DS)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
Isb_DVDD_MEM2_DS = re.match(r'.*(Isb DVDD_MEM2_DS)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
DC_P.setdefault(Isb_DVDD_MEM2_DS.group(1) + '_' + Isb_DVDD_MEM2_DS.group(3), []).append(Isb_DVDD_MEM2_DS.group(2))
# 匹配Isb DVDD18的值
elif re.match(r'.*(Isb DVDD18)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
Isb_DVDD18 = re.match(r'.*(Isb DVDD18)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
DC_P.setdefault(Isb_DVDD18.group(1) + '_' + Isb_DVDD18.group(3), []).append(Isb_DVDD18.group(2))
# 匹配Isb DVDD18_LS的值
elif re.match(r'.*(Isb DVDD18_LS)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
Isb_DVDD18_LS = re.match(r'.*(Isb DVDD18_LS)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
DC_P.setdefault(Isb_DVDD18_LS.group(1) + '_' + Isb_DVDD18_LS.group(3), []).append(Isb_DVDD18_LS.group(2))
# 匹配Isb DVDD18_SD的值
elif re.match(r'.*(Isb DVDD18_SD)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
Isb_DVDD18_SD = re.match(r'.*(Isb DVDD18_SD)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
DC_P.setdefault(Isb_DVDD18_SD.group(1) + '_' + Isb_DVDD18_SD.group(3), []).append(Isb_DVDD18_SD.group(2))
# 匹配Isb DVDD18_DS的值
elif re.match(r'.*(Isb DVDD18_DS)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
Isb_DVDD18_DS = re.match(r'.*(Isb DVDD18_DS)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
DC_P.setdefault(Isb_DVDD18_DS.group(1) + '_' + Isb_DVDD18_DS.group(3), []).append(Isb_DVDD18_DS.group(2))
# 匹配Static_IR_MEM1_TEST_0.90V的值
elif re.match(r'.*(Static_IR_MEM1_TEST_0.90V)\s+(-?[\d.]+)\s*\w+', line, re.I):
Static_IR_MEM1_TEST_H = re.match(r'.*(Static_IR_MEM1_TEST_0.90V)\s+(-?[\d.]+)\s*\w+', line, re.I)
DC_P.setdefault(Static_IR_MEM1_TEST_H.group(1), []).append(Static_IR_MEM1_TEST_H.group(2))
# 匹配Static_IR_MEM1_TEST_0.75V的值
elif re.match(r'.*(Static_IR_MEM1_TEST_0.75V)\s+(-?[\d.]+)\s*\w+', line, re.I):
Static_IR_MEM1_TEST_M = re.match(r'.*(Static_IR_MEM1_TEST_0.75V)\s+(-?[\d.]+)\s*\w+', line, re.I)
DC_P.setdefault(Static_IR_MEM1_TEST_M.group(1), []).append(Static_IR_MEM1_TEST_M.group(2))
# 匹配Static_IR_MEM1_TEST_0.60V的值
elif re.match(r'.*(Static_IR_MEM1_TEST_0.60V)\s+(-?[\d.]+)\s*\w+', line, re.I):
Static_IR_MEM1_TEST_L = re.match(r'.*(Static_IR_MEM1_TEST_0.60V)\s+(-?[\d.]+)\s*\w+', line, re.I)
DC_P.setdefault(Static_IR_MEM1_TEST_L.group(1), []).append(Static_IR_MEM1_TEST_L.group(2))
# 匹配Static_IR_MEM2_TEST_0.90V的值
elif re.match(r'.*(Static_IR_MEM2_TEST_0.90V)\s+(-?[\d.]+)\s*\w+', line, re.I):
Static_IR_MEM2_TEST_H = re.match(r'.*(Static_IR_MEM2_TEST_0.90V)\s+(-?[\d.]+)\s*\w+', line, re.I)
DC_P.setdefault(Static_IR_MEM2_TEST_H.group(1), []).append(Static_IR_MEM2_TEST_H.group(2))
# 匹配Static_IR_MEM2_TEST_0.75V的值
elif re.match(r'.*(Static_IR_MEM2_TEST_0.75V)\s+(-?[\d.]+)\s*\w+', line, re.I):
Static_IR_MEM2_TEST_M = re.match(r'.*(Static_IR_MEM2_TEST_0.75V)\s+(-?[\d.]+)\s*\w+', line, re.I)
DC_P.setdefault(Static_IR_MEM2_TEST_M.group(1), []).append(Static_IR_MEM2_TEST_M.group(2))
# 匹配Static_IR_MEM2_TEST_0.60V的值
elif re.match(r'.*(Static_IR_MEM2_TEST_0.60V)\s+(-?[\d.]+)\s*\w+', line, re.I):
Static_IR_MEM2_TEST_L = re.match(r'.*(Static_IR_MEM2_TEST_0.60V)\s+(-?[\d.]+)\s*\w+', line, re.I)
DC_P.setdefault(Static_IR_MEM2_TEST_L.group(1), []).append(Static_IR_MEM2_TEST_L.group(2))
# 匹配Dynamic_IR_MEM1_TEST_0.90V的值
elif re.match(r'.*(Dynamic_IR_MEM1_TEST_0.90V)\s+(-?[\d.]+)\s*\w+', line, re.I):
Dynamic_IR_MEM1_TEST_H = re.match(r'.*(Dynamic_IR_MEM1_TEST_0.90V)\s+(-?[\d.]+)\s*\w+', line, re.I)
DC_P.setdefault(Dynamic_IR_MEM1_TEST_H.group(1), []).append(Dynamic_IR_MEM1_TEST_H.group(2))
# 匹配Dynamic_IR_MEM1_TEST_0.75V的值
elif re.match(r'.*(Dynamic_IR_MEM1_TEST_0.75V)\s+(-?[\d.]+)\s*\w+', line, re.I):
Dynamic_IR_MEM1_TEST_M = re.match(r'.*(Dynamic_IR_MEM1_TEST_0.75V)\s+(-?[\d.]+)\s*\w+', line, re.I)
DC_P.setdefault(Dynamic_IR_MEM1_TEST_M.group(1), []).append(Dynamic_IR_MEM1_TEST_M.group(2))
# 匹配Dynamic_IR_MEM1_TEST_0.60V的值
elif re.match(r'.*(Dynamic_IR_MEM1_TEST_0.60V)\s+(-?[\d.]+)\s*\w+', line, re.I):
Dynamic_IR_MEM1_TEST_L = re.match(r'.*(Dynamic_IR_MEM1_TEST_0.60V)\s+(-?[\d.]+)\s*\w+', line, re.I)
DC_P.setdefault(Dynamic_IR_MEM1_TEST_L.group(1), []).append(Dynamic_IR_MEM1_TEST_L.group(2))
# 匹配Dynamic_IR_MEM2_TEST_0.90V的值
elif re.match(r'.*(Dynamic_IR_MEM2_TEST_0.90V)\s+(-?[\d.]+)\s*\w+', line, re.I):
Dynamic_IR_MEM2_TEST_H = re.match(r'.*(Dynamic_IR_MEM2_TEST_0.90V)\s+(-?[\d.]+)\s*\w+', line, re.I)
DC_P.setdefault(Dynamic_IR_MEM2_TEST_H.group(1), []).append(Dynamic_IR_MEM2_TEST_H.group(2))
# 匹配Dynamic_IR_MEM2_TEST_0.75V的值
elif re.match(r'.*(Dynamic_IR_MEM2_TEST_0.75V)\s+(-?[\d.]+)\s*\w+', line, re.I):
Dynamic_IR_MEM2_TEST_M = re.match(r'.*(Dynamic_IR_MEM2_TEST_0.75V)\s+(-?[\d.]+)\s*\w+', line, re.I)
DC_P.setdefault(Dynamic_IR_MEM2_TEST_M.group(1), []).append(Dynamic_IR_MEM2_TEST_M.group(2))
# 匹配Dynamic_IR_MEM2_TEST_0.60V的值
elif re.match(r'.*(Dynamic_IR_MEM2_TEST_0.60V)\s+(-?[\d.]+)\s*\w+', line, re.I):
Dynamic_IR_MEM2_TEST_L = re.match(r'.*(Dynamic_IR_MEM2_TEST_0.60V)\s+(-?[\d.]+)\s*\w+', line, re.I)
DC_P.setdefault(Dynamic_IR_MEM2_TEST_L.group(1), []).append(Dynamic_IR_MEM2_TEST_L.group(2))
# 匹配每个Function Item的Fail bit count的值
elif re.match(r'\s[\d.]+\s+(Fail bit count)\s*.\s*(\d+)', line, re.I):
FBC_P = re.match(r'\s[\d.]+\s+(Fail bit count)\s*.\s*(\d+)', line, re.I)
# 因为每颗Die的每个测试项的Fail bit count的值具有互异性, 所以需要全部抓取并存储, 不需要剔除
FBC_Set.append(FBC_P.group(2))
# 匹配Function Item, 并存储在字典FUNC_P中键为Item的值列表中,
elif re.match(r'\s+\d+\.00\s+\w?\s+([SMD].*V)', line, re.I):
Item_P = re.match(r'\s+\d+\.00\s+\w?\s+([SMD].*V)', line, re.I)
# 因为主程序的测试项具有重复性, 所以只抓取并存储一次即可, 这里需要用if剔除重复的
if Item_P.group(1) not in FUNC_Item:
FUNC_Item.append(Item_P.group(1))
length_FBC = len(FBC_Set)
length_Item = len(FUNC_Item)
for i in range(length_Item):
for j in range(1, int(length_FBC / length_Item) + 1):
FUNC_P.setdefault(FUNC_Item[i], []).append(FBC_Set[i + (j - 1) * length_Item])
# 利用等差数列通项公式an = a1 + (n-1)*d, 这里d = length_Item, n_max = length_FBC / length_Item
file.close()
# Excel_gen()用来处理单个txt文件生成的DataFrame, 从而把单个txt文件生成的DataFrame转化成Excel文件
def Excel_gen(DC_P_gen, FUNC_P_gen):
# 合并两个字典
my_data = {**DC_P_gen, **FUNC_P_gen}
# 创建DataFrame对象时, 可通过dtype参数直接指定列的数据类型
df = pd.DataFrame(my_data, dtype=float)
# 获取DataFrame列标签, 等价于cols = list(df.columns.values)
cols = list(df)
# print(cols)
# cols.index('Die_Y')获取标签'Die_Y'的索引值, 并用pop将'Bin'取出, 再用insert将'Bin'列插入'Die_Y'列的后一列
cols.insert(cols.index('Die_Y') + 1, cols.pop(cols.index('Bin')))
# 把Scan HV, Scan MV, Scan LV, March HV, March MV, March LV
# Data Retention 0 MV to LV 0.5V, Data Retention 1 MV to LV 0.5V
# 等测试项移到前面
cols.insert(cols.index('Isb DVDD18_DS_uA') + 1, cols.pop(cols.index('Scan HV')))
cols.insert(cols.index('Scan HV') + 1, cols.pop(cols.index('Scan MV')))
cols.insert(cols.index('Scan MV') + 1, cols.pop(cols.index('Scan LV')))
cols.insert(cols.index('Scan LV') + 1, cols.pop(cols.index('March HV')))
cols.insert(cols.index('March HV') + 1, cols.pop(cols.index('March MV')))
cols.insert(cols.index('March MV') + 1, cols.pop(cols.index('March LV')))
cols.insert(cols.index('March LV') + 1, cols.pop(cols.index('Data Retention 0 MV to LV 0.5V')))
cols.insert(cols.index('Data Retention 0 MV to LV 0.5V') + 1, cols.pop(cols.index('Data Retention 1 MV to LV 0.5V')))
# 用loc获取最新的df的DataFrame; : 表示获取所有行, cols表示获取cols包含的这些列
df = df.loc[:, cols]
# 单独用astype将DataFrame的'Wafer_ID'列的数据类型转换为int型
df['Wafer_ID'] = df['Wafer_ID'].astype(int)
# 生成一个df的DataFrame行索引设置成'Wafer_ID'的副本, 并将其赋给df_index
df_index = df.set_index('Wafer_ID')
row_index = list(df_index.index.values)
column_index = list(df_index.columns.values)
print('正在输出xlsx文件...')
for k in row_index:
# 注意当使用loc或者iloc对DataFrame类型的对象进行索引切片时, 当索引取出的是单行或单列时,
# 索引出的数据类型会发生转换, 变为Series
# df_output = df_index.loc[k, column_index]
# 如果df_output的类型为'Series', 则说明通过索引取出的是单行或单列, 也即该Wafer_ID只出现一次,
# 也说明该片Wafer只有一颗Die的数据(正常情况下是不会出现的, 除非是Debug情况下有可能只Debug了一颗Die),
# 这种情况下则通过逻辑索引进行索引切片, 逻辑索引切片不会发生数据类型的转变
# if isinstance(df_output, pd.Series):
df_output = df_index[df_index.index == k][column_index]
df_output.insert(0, 'Wafer_ID', k)
df_output.set_index('Lot_ID', inplace=True)
if len(str(k)) == 1:
path_str = '_W0'
elif len(str(k)) == 2:
path_str = '_W'
df_output.to_excel('D:/Output/' + df_output.index[0] + path_str + str(k) + '.xlsx')
wb = openpyxl.load_workbook('D:/Output/' + df_output.index[0] + path_str + str(k) + '.xlsx')
ws = wb.active
# 冻结窗口
ws.freeze_panes = 'F2'
# 字体微软雅黑, 加粗
font = openpyxl.styles.Font('微软雅黑', bold=True)
# 填充色为深橙色FF8C00
fill = openpyxl.styles.PatternFill(fill_type='solid', start_color='FF8C00')
# 计算每列最大宽度, 并存储在列表col_widths中
col_widths = []
for i in range(1, ws.max_column + 1):
# 定义初始列宽col_width, 并在每个行循环完成后重置
col_width = 1
# 从第二行开始, 因为第一行为列标签, 即不计算第一行的列标签的宽度, 后面让其自动换行即可
for j in range(2, ws.max_row + 1):
cell_value = ws.cell(row=j, column=i).value
# 中文占用多个字节, 需要分开处理
if isinstance(cell_value, str):
# gbk解码一个中文两字节, utf-8一个中文三字节, gbk合适
col_width_real = len(cell_value.encode('gbk'))
else:
col_width_real = len(str(cell_value))
if col_width < col_width_real:
col_width = col_width_real
col_widths.append(col_width)
# 设置列宽
for i in range(1, ws.max_column + 1):
# 将数字转化为列名, 26个字母以内也可以用[chr(i).upper() for i in range(97, 123)], 不用导入模块
col_name = get_column_letter(i)
# 设置列宽, 一般加两个字节宽度, 可以根据实际情况灵活调整
ws.column_dimensions[col_name].width = col_widths[i - 1] + 6
# 设置第一行的单元格格式
for i in range(1, ws.max_column + 1):
# 设置第一行的单元格为自动换行, 水平居中, 垂直居中
ws.cell(1, i).alignment = openpyxl.styles.Alignment(horizontal='center', vertical='center', wrap_text=True)
# 设置第一行的单元格字体为微软雅黑, 加粗
ws.cell(1, i).font = font
# 设置第一行的单元格填充颜色为深橙色FF8C00
ws.cell(1, i).fill = fill
wb.save('D:/Output/' + df_output.index[0] + path_str + str(k) + '.xlsx')
print('xlsx文件输出完毕!')
if __name__ == '__main__':
print('程序正在运行, 请稍等...')
DC_Para = {}
FUNC_Para = {}
file_lists = glob.glob('D:/Output/*.txt')
for Files in file_lists:
count = 1
DC_Para.clear()
FUNC_Para.clear()
print('=' * 30)
print('正在读取第{}/{}个txt文件...'.format(count, len(file_lists)))
read_txt(DC_Para, FUNC_Para, Files)
print('第{}/{}个txt文件读取完毕!'.format(count, len(file_lists)))
Excel_gen(DC_Para, FUNC_Para)
count += 1
print('程序运行完毕!')
print('=' * 30)