# sequence.gb文件 https://www.ncbi.nlm.nih.gov/nuccore/NC_000006.12?report=genbank
#position.txt
#生成的文件
postion_HLA_seq.fa
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
records = SeqIO.read('sequence.gb', 'gb')
features = records.features
for feature in features:
if feature.type == "CDS":
rec = SeqRecord(feature.location.extract(records).seq)
print feature.location.start.position, feature.location.end.position
rec.id = 'HLA_CDS'
SeqIO.write(rec, 'HLA_CDS.fa', 'fasta')
print rec.seq[362], rec.seq[96], rec.seq[602]
fp = open('postion_HLA_seq.fa', 'w')
with open('position.txt') as fp1:
pos_uniq = list(set(fp1.read().strip().split('\n')))
print len(pos_uniq)
for line in pos_uniq:
pos = int(line)
tmpRes = SeqRecord(rec.seq[pos - 50 : pos] + rec.seq[pos: pos + 49])
tmpRes.id = 'HLA_POS_CDS_{}: {}-{}'.format(pos, pos - 50, pos+49)
tmpRes.description = ''
SeqIO.write(tmpRes, fp, 'fasta')
fp.close()