【实践6】Python按关键字提取txt文本并保存到Excel(2)

第一种方法在另一个帖子: link

后面遇到同样的需要切割txt文档的需求,就用了正则,发现正则会更方便,还没时间整理,直接放源码。因为是刚接触Python时写的,很多包还不清楚方法也不成熟,大家凑合着看

#-*- coding: utf-8 -*-
w1 = '新增'     #要分割的字
w2 = '负责人'
w3 = '截止日期'
# w4 = '号'
w4 = '日'
import re
import xlsxwriter
txt_url = r"D:\文本记录1.txt"      #txt地址
sava_url = r"D:\文本纪录2.xlsx"    #Excel保存路径
# 读取txt 并提取指定字符
def readTXT():
    with open(txt_url, 'r', encoding='utf-8') as f:
        content = f.read()  # 读取txt文件内容 保存到content中
    jihua = re.compile(w1+'(.*?)'+w2,re.S)
    fuzeren = re.compile(w2+'(.*?)'+w3,re.S)
    riqi = re.compile(w3+'(.*?)'+w4,re.S)
    result1 = jihua.findall(content)
    result2 = fuzeren.findall(content)
    result3 = riqi.findall(content)
    #将名字中标点符号替换
    list1=[",", "《", "。", "》", "、","?",";",":","’","“","【","{","】","}","·","~","!","@","#","¥","%","……","&","*","(",")",".","-","/","+"]
    for i in range(len(result2)):
        for a in range(len(list1)):
            result2[i] = result2[i].replace(list1[a], "")
    #将日期中的年和月替换
    list2=["年","月"]
    for i in range(len(result3)):
        for b in range(len(list2)):
            result3[i] = result3[i].replace(list2[b], "/")
    return result1,result2,result3
readTXT()

def saveExcel(str1, str2):
    xl = xlsxwriter.Workbook(sava_url)      #  todo 创建excel文件
    sheet = xl.add_worksheet('sheet1')  # todo 添加sheet
    # 向表中添加数据标题
    sheet.write(0, 0, 'NO')  # 其中的'0-行, 0-列'指定表中的单元,'X'是向该单元写入的内容
    sheet.write(0, 1, 'Dept') 
    sheet.write(0, 2, 'Issue') 
    sheet.write(0, 3, 'Action') 
    sheet.write(0, 4, 'Link')
    sheet.write(0, 5, 'PIC')
    sheet.write(0, 6, 'Due Day')
    sheet.write(0, 7, 'Status')
    
    # todo 往单元格cell添加数据,索引写入
    for i in range(len(str1)):
        if i % 1000 == 0:			# 数据50一组
            j = 1				# 每一组数据后 重新从0行 开始存入
        sheet.write(j,0,j)#往表格里写入X坐标  NO
        sheet.write(j,1,'-')     #部门
        sheet.write(j,2,str1[i])    #冲刺计划内容
        sheet.write(j,3,'-')       #空白
        sheet.write(j,4,'-')        #空白
        sheet.write(j,7,'Ongoing')  #进度
        j = j + 1				# 行+1 准备写入下一行
    for a in range(len(str2)):
        if a % 1000 == 0:			# 数据50一组
            q = 1
        sheet.write(q,5,str2[a])
        q = q + 1
    for b in range(len(str3)):
        if b % 1000 == 0:			# 数据50一组
            w = 1
        sheet.write(w,6,str3[b])
        w = w + 1
    #  todo 设置单元格宽度大小
    # sheet.set_column('A:B', 30)
    # todo 关闭文件
    xl.close()
str1, str2, str3 = readTXT()
print(str1)
print(str2)
print(str3)
saveExcel(str1, str2)

要提取的是【新增】和【负责人】之间的,【负责人】与【截止日期】之间的等等的句子。要提取的内容是括号里面:‘(.*?)’

你可能感兴趣的:(python,开发语言)