python遍历提取文件夹里的某几行内容并去除所有数字代码

import os
import re

list=os.listdir('C:/地址')
print(list)

"""
##合并文件夹里的所有txt文件
for i in list:
    print(i)
    with open("C:/地址/"+i) as f:
        for line in f.readlines():
            with open("C:/地址/all.txt","a") as p:
                p.write(line)
"""
#提取文件夹里所有txt的倒数第一个空行和倒数第二个空行之间的内容并合并
for i in list:
    print(i)
    hang = 0
    konghang = 0
    first = 0
    with open("C:/地址/"+i) as f:
        for line in f.readlines():
            hang = hang + 1 #一共几行
            if line=='\n':
                konghang=konghang + 1 #一共几个空行
                first = hang #最后一个空行的行数
    with open("C:/地址/"+i) as g:
        num=0
        for line in g.readlines():
            num = num+1 #提取倒数第二个空行后的一行内容
            if num==first-1:
                print(line)
                with open("C:/地址/all.txt","a") as p:
                    p.write(line)
#删掉数字
remove_chars = [0-9’!#$%&'()*+,-./:;<=>?@,。?★、…【】《》?“”‘’![\]^_`{|}~]+
with open("C:/Users/ASUS/Downloads/casp11.T0_all.RR/all.txt") as h:
        for line in h.readlines():
            line = re.sub(remove_chars, '', line)
            with open("C:/地址/ss.txt","a") as q:
                p.write(line)  
#删掉“>”后面的内容并且把每一段的分行合并起来,但是每一段还是要有分段
with open("C:/Users/ASUS/Downloads/casp12.seq.txt") as f:
        for line in f.readlines():
            line = re.sub('>.*?\n', "," ,line)
            line = re.sub('\n', '', line)
            line = re.sub(',', '\n', line)
            with open("C:/Users/ASUS/Downloads/seq12.txt","a") as p:
                p.write(line)   
#五个str一组,连续并换行
with open("C:/Users/ASUS/Downloads/seq12.txt") as f1:
    for line in f1.readlines():       
        i=0
        while i < (len(line)-6):
            print(line[i:i+5])
            i +=1
            with open("C:/Users/ASUS/Downloads/unlabelaa.txt", "a") as f2:
                f2.write(line[i:i+5]+'\n')
#替换并编码
#X = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,'\n']
#Y = ['A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y','\n']
with open("C:/Users/ASUS/Downloads/unlabelaa.txt") as f3:
    for line in f3.readlines():
        words = line.replace('A', '0 ').replace('C', '1 ').replace('D', '2 
').replace('E', '3 ').replace('F', '4 ').replace('G', '5 ').replace('H', '6 
'.replace('I', '7 ').replace('K', '8 ').replace('L', '9 ').replace('M', '10 
').replace('N', '11 ').replace('P', '12 ').replace('Q', '13 ').replace('R', '14 
').replace('S', '15 ').replace('T', '16 ').replace('V', '17 ').replace('W', '18 
').replace('Y', '19 ')
        print(words)
        with open("C:/Users/ASUS/Downloads/unlabelnum.txt", "a") as f4:
                f4.write(words)
#加n个20
with open("C:/Users/ASUS/Downloads/unlabelnum.txt") as f5:                
    for line in f5.readlines():
            with open("C:/Users/ASUS/Downloads/aa.txt", "a") as f6:
                f6.write('0 ')
            l = re.split(' ',line)
            l.pop()
            l = [int(x) for x in l]
            i = 0
            li=[]
            while i<len(l):
                li=l[i]+20*i
                i +=1
                with open("C:/Users/ASUS/Downloads/aa.txt", "a") as f6:
                    f6.write(str(li)+' ')
            with open("C:/Users/ASUS/Downloads/aa.txt", "a") as f6:
                    f6.write('\n') 

本人初学python。只会用with open 不会写函数,真的啰嗦,如果有更好的方法请多多指教!

你可能感兴趣的:(python)