# !/usr/bin/python3
import sys
import re
from reverse_seq import *
def format_seq(seq,num):
l = len(seq)
m = l // num #取整除
fseq = ""
for i in range(m+1):
fseq += seq[i*num : (i+1)*num] + "\n"
return fseq
if __name__ == '__main__':
seq_file = open(r"D:\sequence.fasta","r")
out_file = open(r"D:\sequence_rervese_complement.fasta","w")
seq = ""
for eachline in seq_file:
#>lcl|JAKRYI020000014.1_cds_KAI4295232.1_1 [locus_tag=L6164_035299] [protein=hypothetical protein] [protein_id=KAI4295232.1] [location=join(11145..11254,12514..12625,13047..13118,13225..13436,13525..13633)] [gbkey=CDS]
eachline = eachline.strip()
if (eachline[0] == r">"):
line_match = re.match(r">\w{3}\|(\w+\.\d).+", eachline)
print("1:",line_match.group())
print("2:", line_match.group(1))
#print("3:",line_match.group(2))
reads_name = line_match.group(1)
if (seq == ""):
out_file.write(r">" + reads_name+"\n") # reads名称,只保留accession号,写入第一条reads名称
else:
trans_seq = translate_seq(seq)
rseq = trans_seq[::-1]
out_file.write(format_seq(rseq,70))
out_file.write(r">" + reads_name + "\n") # 写入第二至最后一条reads
seq = "" # 处理完后,把seq置为空
else:
seq = seq + eachline
else: #for循环的else子句在for循环正常结束后执行
#处理最后一条序列
trans_seq = translate_seq(seq)
rseq = trans_seq[::-1]
out_file.write(format_seq(rseq,70))
out_file.close()
seq_file.close()