from __future__ import division
import re
with open('sequence01.fasta') as file:
for line in file:
print (line)
fasta = {}
with open('sequence01.fasta') as file:
sequence = ""
for line in file:
if line.startswith(">"):
name = line[1:].rstrip()
print(name)
fasta[name] = ''
continue
fasta[name] += line.rstrip().upper()
print (fasta)
print(fasta[name])
def get_fasta(fasta_path):
fasta = {}
with open(fasta_path) as file:
sequence = ""
for line in file:
if line.startswith(">"):
name = line[1:].rstrip()
fasta[name] = ''
continue
fasta[name] += line.rstrip().upper()
return fasta
get_fasta_result=get_fasta('sequence01.fasta')
print(get_fasta_result)
print(get_fasta_result[name])
def nt_count(seq):
ntCounts = []
for nt in ['A', 'C', 'G', 'T']:
ntCounts.append(seq.count(nt))
return ntCounts
seq=get_fasta_result[name]
nt_count_result= nt_count(seq)
print(nt_count_result)
def cg_content(seq):
total = len(seq)
gcCount = seq.count('G') + seq.count('C')
gcContent = format(float(gcCount / total * 100), '.6f')
return gcContent
seq=get_fasta_result[name]
cg_content_result=cg_content(seq)
print(cg_content_result)
def dna_trans_rna(seq):
rnaSeq = re.sub('T', 'U', seq)
return rnaSeq
seq=get_fasta_result[name]
dna_trans_rna_result=dna_trans_rna(seq)
print(dna_trans_rna_result)
def rna_trans_protein(rnaSeq):
codonTable = {
'AUA':'I', 'AUC':'I', 'AUU':'I', 'AUG':'M',
'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACU':'T',
'AAC':'N', 'AAU':'N', 'AAA':'K', 'AAG':'K',
'AGC':'S', 'AGU':'S', 'AGA':'R', 'AGG':'R',
'CUA':'L', 'CUC':'L', 'CUG':'L', 'CUU':'L',
'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCU':'P',
'CAC':'H', 'CAU':'H', 'CAA':'Q', 'CAG':'Q',
'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGU':'R',
'GUA':'V', 'GUC':'V', 'GUG':'V', 'GUU':'V',
'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCU':'A',
'GAC':'D', 'GAU':'D', 'GAA':'E', 'GAG':'E',
'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGU':'G',
'UCA':'S', 'UCC':'S', 'UCG':'S', 'UCU':'S',
'UUC':'F', 'UUU':'F', 'UUA':'L', 'UUG':'L',
'UAC':'Y', 'UAU':'Y', 'UAA':'', 'UAG':'',
'UGC':'C', 'UGU':'C', 'UGA':'', 'UGG':'W',
}
proteinSeq = ""
for codonStart in range(0, len(rnaSeq), 3):
codon = rnaSeq[codonStart:codonStart + 3]
if codon in codonTable:
proteinSeq += codonTable[codon]
return proteinSeq
rnaSeq=dna_trans_rna_result
rna_trans_protein_result=rna_trans_protein(rnaSeq)
print(rna_trans_protein_result)
def reverse_comple(type, seq):
seq = seq[::-1]
dnaTable = {
"A":"T", "T":"A", "C":"G", "G":"C"
}
rnaTable = {
"A": "T", "U": "A", "C": "G", "G": "C"
}
res = ""
if type == "dna":
for ele in seq:
if ele in seq:
if type == "dna":
res += dnaTable[ele]
else:
res += rnaTable[ele]
return res
type1="dna"
seq1=get_fasta_result[name]
dna_reverse_comple_result=reverse_comple(type1, seq1)
print(dna_reverse_comple_result)
type2="rna"
seq2=dna_trans_rna_result
rna_reverse_comple_result=reverse_comple(type2, seq2)
print(rna_reverse_comple_result)
[1]: https://blog.csdn.net/u011262253/article/details/88542804
[2]: https://www.ncbi.nlm.nih.gov/nuccore/NC_000006.12?report=fasta&from=31164337&to=31170682&strand=true