Python+pandas+openpyxl+正则表达式——把txt文件中的有效信息格式化输出成Excel文件

一、需求分析

1、需要从给定txt文件中提取有效信息,txt文件里面的无关内容很多,非人力所能完成,这是需要利用Python正则表达式来抓取有效信息。txt文件的格式如下:

 0062.467              KLcin Cont                -0.2378 V [AWEM0]
 
 0062.485               dffin city                  -0.5893 V [BWEM1]
 
 0052.505               deslin test                -0.4378 V [CWE]
 
 0022.529               monday  morning      -0.5679 V [DME]
 
 0032.547               every  word                -0.4938 V [ELS]
 
 0042.564               Hello Word                -0.2567 V [FRA1]

注释:真实txt内容不便展示,这里只象征性的展示txt内容的格式

2、把txt里面的有效信息提取出来以后需要利用pandas和openpyxl把DataFrame数据类型格式化输出到Excel表格中,并保存到本地。这里面涉及到利用openpyxl对表格样式进行调整,包括单元格字体,颜色,边框,背景颜色填充,列宽调整,单元格内文字对齐方式, 自动换行,冻结窗口,列的插入、删除、排序等等。

3、利用Python中的glob库实现自动批量读取txt文件,并自动批量生成Excel文件,并保存到本地指定文件夹下。

二、Python代码实现如下:

import pandas as pd
import re
import openpyxl
from openpyxl.utils import get_column_letter
import glob


# read_txt()用来读取单个txt文件
def read_txt(DC_P, FUNC_P, files):
    FUNC_Item = []
    FBC_Set = []

    file = open(files, encoding='utf-8')

    for line in file:

        # 匹配Lot_ID, Wafer_ID, Die_X, Die_Y的值
        if re.match(r'.*(B\w\d+_\w\d|A\w\d+_\w\d|DEBUG_\w\d).*W.{2}\s*(\d+)\s*Die.*X.(\d+)\s*Y.(\d+).', line, re.I):
            Lot_Wafer_Die = re.match(r'.*(B\w\d+_\w\d|A\w\d+_\w\d|DEBUG_\w\d).*W.{2}\s*(\d+)\s*Die.*X.(\d+)\s*Y.(\d+).', line, re.I)
            Lot_ID = Lot_Wafer_Die.group(1)
            Wafer_ID = Lot_Wafer_Die.group(2)
            Die_X = Lot_Wafer_Die.group(3)
            Die_Y = Lot_Wafer_Die.group(4)
            DC_P.setdefault('Lot_ID', []).append(Lot_ID)
            DC_P.setdefault('Wafer_ID', []).append(Wafer_ID)
            DC_P.setdefault('Die_X', []).append(Die_X)
            DC_P.setdefault('Die_Y', []).append(Die_Y)

        # 匹配Bin的值
        elif re.match(r'\s*First Fail (Bin)\s*.\s*(\d+)', line, re.I):
            Bin_Sort = re.match(r'\s*First Fail (Bin)\s*.\s*(\d+)', line, re.I)
            DC_P.setdefault(Bin_Sort.group(1), []).append(Bin_Sort.group(2))

        # 匹配Isb DVDD_MEMO的值
        elif re.match(r'.*(Isb DVDD_MEM0)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
            Isb_DVDD_MEM0 = re.match(r'.*(Isb DVDD_MEM0)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
            DC_P.setdefault(Isb_DVDD_MEM0.group(1) + '_' + Isb_DVDD_MEM0.group(3), []).append(Isb_DVDD_MEM0.group(2))

        # 匹配Isb DVDD_MEMO_LS的值
        elif re.match(r'.*(Isb DVDD_MEM0_LS)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
            Isb_DVDD_MEM0_LS = re.match(r'.*(Isb DVDD_MEM0_LS)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
            DC_P.setdefault(Isb_DVDD_MEM0_LS.group(1) + '_' + Isb_DVDD_MEM0_LS.group(3), []).append(Isb_DVDD_MEM0_LS.group(2))

        # 匹配Isb DVDD_MEMO_SD的值
        elif re.match(r'.*(Isb DVDD_MEM0_SD)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
            Isb_DVDD_MEM0_SD = re.match(r'.*(Isb DVDD_MEM0_SD)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
            DC_P.setdefault(Isb_DVDD_MEM0_SD.group(1) + '_' + Isb_DVDD_MEM0_SD.group(3), []).append(Isb_DVDD_MEM0_SD.group(2))

        # 匹配Isb DVDD_MEMO_DS的值
        elif re.match(r'.*(Isb DVDD_MEM0_DS)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
            Isb_DVDD_MEM0_DS = re.match(r'.*(Isb DVDD_MEM0_DS)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
            DC_P.setdefault(Isb_DVDD_MEM0_DS.group(1) + '_' + Isb_DVDD_MEM0_DS.group(3), []).append(Isb_DVDD_MEM0_DS.group(2))

        # 匹配Isb DVDD_MEM1的值
        elif re.match(r'.*(Isb DVDD_MEM1)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
            Isb_DVDD_MEM1 = re.match(r'.*(Isb DVDD_MEM1)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
            DC_P.setdefault(Isb_DVDD_MEM1.group(1) + '_' + Isb_DVDD_MEM1.group(3), []).append(Isb_DVDD_MEM1.group(2))

        # 匹配Isb DVDD_MEM1_LS的值
        elif re.match(r'.*(Isb DVDD_MEM1_LS)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
            Isb_DVDD_MEM1_LS = re.match(r'.*(Isb DVDD_MEM1_LS)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
            DC_P.setdefault(Isb_DVDD_MEM1_LS.group(1) + '_' + Isb_DVDD_MEM1_LS.group(3), []).append(Isb_DVDD_MEM1_LS.group(2))

        # 匹配Isb DVDD_MEM1_SD的值
        elif re.match(r'.*(Isb DVDD_MEM1_SD)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
            Isb_DVDD_MEM1_SD = re.match(r'.*(Isb DVDD_MEM1_SD)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
            DC_P.setdefault(Isb_DVDD_MEM1_SD.group(1) + '_' + Isb_DVDD_MEM1_SD.group(3), []).append(Isb_DVDD_MEM1_SD.group(2))

        # 匹配Isb DVDD_MEM1_DS的值
        elif re.match(r'.*(Isb DVDD_MEM1_DS)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
            Isb_DVDD_MEM1_DS = re.match(r'.*(Isb DVDD_MEM1_DS)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
            DC_P.setdefault(Isb_DVDD_MEM1_DS.group(1) + '_' + Isb_DVDD_MEM1_DS.group(3), []).append(Isb_DVDD_MEM1_DS.group(2))

        # 匹配Isb DVDD_MEM2的值
        elif re.match(r'.*(Isb DVDD_MEM2)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
            Isb_DVDD_MEM2 = re.match(r'.*(Isb DVDD_MEM2)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
            DC_P.setdefault(Isb_DVDD_MEM2.group(1) + '_' + Isb_DVDD_MEM2.group(3), []).append(Isb_DVDD_MEM2.group(2))

        # 匹配Isb DVDD_MEM2_LS的值
        elif re.match(r'.*(Isb DVDD_MEM2_LS)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
            Isb_DVDD_MEM2_LS = re.match(r'.*(Isb DVDD_MEM2_LS)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
            DC_P.setdefault(Isb_DVDD_MEM2_LS.group(1) + '_' + Isb_DVDD_MEM2_LS.group(3), []).append(Isb_DVDD_MEM2_LS.group(2))

        # 匹配Isb DVDD_MEM2_SD的值
        elif re.match(r'.*(Isb DVDD_MEM2_SD)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
            Isb_DVDD_MEM2_SD = re.match(r'.*(Isb DVDD_MEM2_SD)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
            DC_P.setdefault(Isb_DVDD_MEM2_SD.group(1) + '_' + Isb_DVDD_MEM2_SD.group(3), []).append(Isb_DVDD_MEM2_SD.group(2))

        # 匹配Isb DVDD_MEM2_DS的值
        elif re.match(r'.*(Isb DVDD_MEM2_DS)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
            Isb_DVDD_MEM2_DS = re.match(r'.*(Isb DVDD_MEM2_DS)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
            DC_P.setdefault(Isb_DVDD_MEM2_DS.group(1) + '_' + Isb_DVDD_MEM2_DS.group(3), []).append(Isb_DVDD_MEM2_DS.group(2))

        # 匹配Isb DVDD18的值
        elif re.match(r'.*(Isb DVDD18)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
            Isb_DVDD18 = re.match(r'.*(Isb DVDD18)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
            DC_P.setdefault(Isb_DVDD18.group(1) + '_' + Isb_DVDD18.group(3), []).append(Isb_DVDD18.group(2))

        # 匹配Isb DVDD18_LS的值
        elif re.match(r'.*(Isb DVDD18_LS)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
            Isb_DVDD18_LS = re.match(r'.*(Isb DVDD18_LS)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
            DC_P.setdefault(Isb_DVDD18_LS.group(1) + '_' + Isb_DVDD18_LS.group(3), []).append(Isb_DVDD18_LS.group(2))

        # 匹配Isb DVDD18_SD的值
        elif re.match(r'.*(Isb DVDD18_SD)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
            Isb_DVDD18_SD = re.match(r'.*(Isb DVDD18_SD)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
            DC_P.setdefault(Isb_DVDD18_SD.group(1) + '_' + Isb_DVDD18_SD.group(3), []).append(Isb_DVDD18_SD.group(2))

        # 匹配Isb DVDD18_DS的值
        elif re.match(r'.*(Isb DVDD18_DS)\s+(-?[\d.]+)\s*(\w+)', line, re.I):
            Isb_DVDD18_DS = re.match(r'.*(Isb DVDD18_DS)\s+(-?[\d.]+)\s*(\w+)', line, re.I)
            DC_P.setdefault(Isb_DVDD18_DS.group(1) + '_' + Isb_DVDD18_DS.group(3), []).append(Isb_DVDD18_DS.group(2))

        # 匹配Static_IR_MEM1_TEST_0.90V的值
        elif re.match(r'.*(Static_IR_MEM1_TEST_0.90V)\s+(-?[\d.]+)\s*\w+', line, re.I):
            Static_IR_MEM1_TEST_H = re.match(r'.*(Static_IR_MEM1_TEST_0.90V)\s+(-?[\d.]+)\s*\w+', line, re.I)
            DC_P.setdefault(Static_IR_MEM1_TEST_H.group(1), []).append(Static_IR_MEM1_TEST_H.group(2))

        # 匹配Static_IR_MEM1_TEST_0.75V的值
        elif re.match(r'.*(Static_IR_MEM1_TEST_0.75V)\s+(-?[\d.]+)\s*\w+', line, re.I):
            Static_IR_MEM1_TEST_M = re.match(r'.*(Static_IR_MEM1_TEST_0.75V)\s+(-?[\d.]+)\s*\w+', line, re.I)
            DC_P.setdefault(Static_IR_MEM1_TEST_M.group(1), []).append(Static_IR_MEM1_TEST_M.group(2))

        # 匹配Static_IR_MEM1_TEST_0.60V的值
        elif re.match(r'.*(Static_IR_MEM1_TEST_0.60V)\s+(-?[\d.]+)\s*\w+', line, re.I):
            Static_IR_MEM1_TEST_L = re.match(r'.*(Static_IR_MEM1_TEST_0.60V)\s+(-?[\d.]+)\s*\w+', line, re.I)
            DC_P.setdefault(Static_IR_MEM1_TEST_L.group(1), []).append(Static_IR_MEM1_TEST_L.group(2))

        # 匹配Static_IR_MEM2_TEST_0.90V的值
        elif re.match(r'.*(Static_IR_MEM2_TEST_0.90V)\s+(-?[\d.]+)\s*\w+', line, re.I):
            Static_IR_MEM2_TEST_H = re.match(r'.*(Static_IR_MEM2_TEST_0.90V)\s+(-?[\d.]+)\s*\w+', line, re.I)
            DC_P.setdefault(Static_IR_MEM2_TEST_H.group(1), []).append(Static_IR_MEM2_TEST_H.group(2))

        # 匹配Static_IR_MEM2_TEST_0.75V的值
        elif re.match(r'.*(Static_IR_MEM2_TEST_0.75V)\s+(-?[\d.]+)\s*\w+', line, re.I):
            Static_IR_MEM2_TEST_M = re.match(r'.*(Static_IR_MEM2_TEST_0.75V)\s+(-?[\d.]+)\s*\w+', line, re.I)
            DC_P.setdefault(Static_IR_MEM2_TEST_M.group(1), []).append(Static_IR_MEM2_TEST_M.group(2))

        # 匹配Static_IR_MEM2_TEST_0.60V的值
        elif re.match(r'.*(Static_IR_MEM2_TEST_0.60V)\s+(-?[\d.]+)\s*\w+', line, re.I):
            Static_IR_MEM2_TEST_L = re.match(r'.*(Static_IR_MEM2_TEST_0.60V)\s+(-?[\d.]+)\s*\w+', line, re.I)
            DC_P.setdefault(Static_IR_MEM2_TEST_L.group(1), []).append(Static_IR_MEM2_TEST_L.group(2))

        # 匹配Dynamic_IR_MEM1_TEST_0.90V的值
        elif re.match(r'.*(Dynamic_IR_MEM1_TEST_0.90V)\s+(-?[\d.]+)\s*\w+', line, re.I):
            Dynamic_IR_MEM1_TEST_H = re.match(r'.*(Dynamic_IR_MEM1_TEST_0.90V)\s+(-?[\d.]+)\s*\w+', line, re.I)
            DC_P.setdefault(Dynamic_IR_MEM1_TEST_H.group(1), []).append(Dynamic_IR_MEM1_TEST_H.group(2))

        # 匹配Dynamic_IR_MEM1_TEST_0.75V的值
        elif re.match(r'.*(Dynamic_IR_MEM1_TEST_0.75V)\s+(-?[\d.]+)\s*\w+', line, re.I):
            Dynamic_IR_MEM1_TEST_M = re.match(r'.*(Dynamic_IR_MEM1_TEST_0.75V)\s+(-?[\d.]+)\s*\w+', line, re.I)
            DC_P.setdefault(Dynamic_IR_MEM1_TEST_M.group(1), []).append(Dynamic_IR_MEM1_TEST_M.group(2))

        # 匹配Dynamic_IR_MEM1_TEST_0.60V的值
        elif re.match(r'.*(Dynamic_IR_MEM1_TEST_0.60V)\s+(-?[\d.]+)\s*\w+', line, re.I):
            Dynamic_IR_MEM1_TEST_L = re.match(r'.*(Dynamic_IR_MEM1_TEST_0.60V)\s+(-?[\d.]+)\s*\w+', line, re.I)
            DC_P.setdefault(Dynamic_IR_MEM1_TEST_L.group(1), []).append(Dynamic_IR_MEM1_TEST_L.group(2))

        # 匹配Dynamic_IR_MEM2_TEST_0.90V的值
        elif re.match(r'.*(Dynamic_IR_MEM2_TEST_0.90V)\s+(-?[\d.]+)\s*\w+', line, re.I):
            Dynamic_IR_MEM2_TEST_H = re.match(r'.*(Dynamic_IR_MEM2_TEST_0.90V)\s+(-?[\d.]+)\s*\w+', line, re.I)
            DC_P.setdefault(Dynamic_IR_MEM2_TEST_H.group(1), []).append(Dynamic_IR_MEM2_TEST_H.group(2))

        # 匹配Dynamic_IR_MEM2_TEST_0.75V的值
        elif re.match(r'.*(Dynamic_IR_MEM2_TEST_0.75V)\s+(-?[\d.]+)\s*\w+', line, re.I):
            Dynamic_IR_MEM2_TEST_M = re.match(r'.*(Dynamic_IR_MEM2_TEST_0.75V)\s+(-?[\d.]+)\s*\w+', line, re.I)
            DC_P.setdefault(Dynamic_IR_MEM2_TEST_M.group(1), []).append(Dynamic_IR_MEM2_TEST_M.group(2))

        # 匹配Dynamic_IR_MEM2_TEST_0.60V的值
        elif re.match(r'.*(Dynamic_IR_MEM2_TEST_0.60V)\s+(-?[\d.]+)\s*\w+', line, re.I):
            Dynamic_IR_MEM2_TEST_L = re.match(r'.*(Dynamic_IR_MEM2_TEST_0.60V)\s+(-?[\d.]+)\s*\w+', line, re.I)
            DC_P.setdefault(Dynamic_IR_MEM2_TEST_L.group(1), []).append(Dynamic_IR_MEM2_TEST_L.group(2))

        # 匹配每个Function Item的Fail bit count的值
        elif re.match(r'\s[\d.]+\s+(Fail bit count)\s*.\s*(\d+)', line, re.I):
            FBC_P = re.match(r'\s[\d.]+\s+(Fail bit count)\s*.\s*(\d+)', line, re.I)
            # 因为每颗Die的每个测试项的Fail bit count的值具有互异性, 所以需要全部抓取并存储, 不需要剔除
            FBC_Set.append(FBC_P.group(2))

        # 匹配Function Item, 并存储在字典FUNC_P中键为Item的值列表中,
        elif re.match(r'\s+\d+\.00\s+\w?\s+([SMD].*V)', line, re.I):
            Item_P = re.match(r'\s+\d+\.00\s+\w?\s+([SMD].*V)', line, re.I)
            # 因为主程序的测试项具有重复性, 所以只抓取并存储一次即可, 这里需要用if剔除重复的
            if Item_P.group(1) not in FUNC_Item:
                FUNC_Item.append(Item_P.group(1))

    length_FBC = len(FBC_Set)
    length_Item = len(FUNC_Item)
    for i in range(length_Item):
        for j in range(1, int(length_FBC / length_Item) + 1):
            FUNC_P.setdefault(FUNC_Item[i], []).append(FBC_Set[i + (j - 1) * length_Item])
            # 利用等差数列通项公式an = a1 + (n-1)*d, 这里d = length_Item, n_max = length_FBC / length_Item

    file.close()


# Excel_gen()用来处理单个txt文件生成的DataFrame, 从而把单个txt文件生成的DataFrame转化成Excel文件
def Excel_gen(DC_P_gen, FUNC_P_gen):
    # 合并两个字典
    my_data = {**DC_P_gen, **FUNC_P_gen}

    # 创建DataFrame对象时, 可通过dtype参数直接指定列的数据类型
    df = pd.DataFrame(my_data, dtype=float)

    # 获取DataFrame列标签, 等价于cols = list(df.columns.values)
    cols = list(df)
    # print(cols)

    # cols.index('Die_Y')获取标签'Die_Y'的索引值, 并用pop将'Bin'取出, 再用insert将'Bin'列插入'Die_Y'列的后一列
    cols.insert(cols.index('Die_Y') + 1, cols.pop(cols.index('Bin')))

    # 把Scan HV, Scan MV, Scan LV, March HV, March MV, March LV
    # Data Retention 0 MV to LV 0.5V, Data Retention 1 MV to LV 0.5V
    # 等测试项移到前面
    cols.insert(cols.index('Isb DVDD18_DS_uA') + 1, cols.pop(cols.index('Scan HV')))
    cols.insert(cols.index('Scan HV') + 1, cols.pop(cols.index('Scan MV')))
    cols.insert(cols.index('Scan MV') + 1, cols.pop(cols.index('Scan LV')))
    cols.insert(cols.index('Scan LV') + 1, cols.pop(cols.index('March HV')))
    cols.insert(cols.index('March HV') + 1, cols.pop(cols.index('March MV')))
    cols.insert(cols.index('March MV') + 1, cols.pop(cols.index('March LV')))
    cols.insert(cols.index('March LV') + 1, cols.pop(cols.index('Data Retention 0 MV to LV 0.5V')))
    cols.insert(cols.index('Data Retention 0 MV to LV 0.5V') + 1, cols.pop(cols.index('Data Retention 1 MV to LV 0.5V')))

    # 用loc获取最新的df的DataFrame; : 表示获取所有行, cols表示获取cols包含的这些列
    df = df.loc[:, cols]

    # 单独用astype将DataFrame的'Wafer_ID'列的数据类型转换为int型
    df['Wafer_ID'] = df['Wafer_ID'].astype(int)

    # 生成一个df的DataFrame行索引设置成'Wafer_ID'的副本, 并将其赋给df_index
    df_index = df.set_index('Wafer_ID')

    row_index = list(df_index.index.values)
    column_index = list(df_index.columns.values)

    print('正在输出xlsx文件...')
    for k in row_index:
        # 注意当使用loc或者iloc对DataFrame类型的对象进行索引切片时, 当索引取出的是单行或单列时,
        # 索引出的数据类型会发生转换, 变为Series
        # df_output = df_index.loc[k, column_index]

        # 如果df_output的类型为'Series', 则说明通过索引取出的是单行或单列, 也即该Wafer_ID只出现一次,
        # 也说明该片Wafer只有一颗Die的数据(正常情况下是不会出现的, 除非是Debug情况下有可能只Debug了一颗Die),
        # 这种情况下则通过逻辑索引进行索引切片, 逻辑索引切片不会发生数据类型的转变
        # if isinstance(df_output, pd.Series):
        df_output = df_index[df_index.index == k][column_index]

        df_output.insert(0, 'Wafer_ID', k)
        df_output.set_index('Lot_ID', inplace=True)

        if len(str(k)) == 1:
            path_str = '_W0'
        elif len(str(k)) == 2:
            path_str = '_W'

        df_output.to_excel('D:/Output/' + df_output.index[0] + path_str + str(k) + '.xlsx')
        wb = openpyxl.load_workbook('D:/Output/' + df_output.index[0] + path_str + str(k) + '.xlsx')
        ws = wb.active

        # 冻结窗口
        ws.freeze_panes = 'F2'

        # 字体微软雅黑, 加粗
        font = openpyxl.styles.Font('微软雅黑', bold=True)
        # 填充色为深橙色FF8C00
        fill = openpyxl.styles.PatternFill(fill_type='solid', start_color='FF8C00')

        # 计算每列最大宽度, 并存储在列表col_widths中
        col_widths = []
        for i in range(1, ws.max_column + 1):
            # 定义初始列宽col_width, 并在每个行循环完成后重置
            col_width = 1
            # 从第二行开始, 因为第一行为列标签, 即不计算第一行的列标签的宽度, 后面让其自动换行即可
            for j in range(2, ws.max_row + 1):
                cell_value = ws.cell(row=j, column=i).value
                # 中文占用多个字节, 需要分开处理
                if isinstance(cell_value, str):
                    # gbk解码一个中文两字节, utf-8一个中文三字节, gbk合适
                    col_width_real = len(cell_value.encode('gbk'))
                else:
                    col_width_real = len(str(cell_value))
                if col_width < col_width_real:
                    col_width = col_width_real
            col_widths.append(col_width)

        # 设置列宽
        for i in range(1, ws.max_column + 1):
            # 将数字转化为列名, 26个字母以内也可以用[chr(i).upper() for i in range(97, 123)], 不用导入模块
            col_name = get_column_letter(i)
            # 设置列宽, 一般加两个字节宽度, 可以根据实际情况灵活调整
            ws.column_dimensions[col_name].width = col_widths[i - 1] + 6

        # 设置第一行的单元格格式
        for i in range(1, ws.max_column + 1):
            # 设置第一行的单元格为自动换行, 水平居中, 垂直居中
            ws.cell(1, i).alignment = openpyxl.styles.Alignment(horizontal='center', vertical='center', wrap_text=True)
            # 设置第一行的单元格字体为微软雅黑, 加粗
            ws.cell(1, i).font = font
            # 设置第一行的单元格填充颜色为深橙色FF8C00
            ws.cell(1, i).fill = fill

        wb.save('D:/Output/' + df_output.index[0] + path_str + str(k) + '.xlsx')
    print('xlsx文件输出完毕!')


if __name__ == '__main__':

    print('程序正在运行, 请稍等...')
    DC_Para = {}
    FUNC_Para = {}

    file_lists = glob.glob('D:/Output/*.txt')

    for Files in file_lists:
        count = 1
        DC_Para.clear()
        FUNC_Para.clear()

        print('=' * 30)
        print('正在读取第{}/{}个txt文件...'.format(count, len(file_lists)))
        read_txt(DC_Para, FUNC_Para, Files)
        print('第{}/{}个txt文件读取完毕!'.format(count, len(file_lists)))
        Excel_gen(DC_Para, FUNC_Para)
        count += 1

    print('程序运行完毕!')
    print('=' * 30)

三、这篇文章先讲从txt到Excel的格式化输出,下篇文章再更新从Excel到可视化图表的内容!实现数据的可视化分析!

你可能感兴趣的:(python,pycharm,正则表达式,pandas,numpy)