有时候我们需要知道fasta文件的具体某条染色体的长度
#!usr/bin/python
# -*- coding: utf-8 -*-
#coding=utf-8
import sys,re
t=sys.argv[1]#fasta文件
num=sys.argv[2]#染色体ID 比如第一条染色体 就输入 1
num=int(num)
nextnum=num+1
sca_line=0
f=open(t,"r")
flag=0
for line in f:
if re.match(">HiC_scaffold_%d"%(num+1),line):
flag=0
if flag==1:
sca_line+=len(line)-1
if re.match(">HiC_scaffold_%d"%(num),line):
flag=1
print ("HiC_scaffold_%d\t%dbp"%(num,sca_line))