从fasta文件中批量提取序列

可以从fasta中提取基因序列的4款软件 - 简书

根据序列ID提取fasta序列_根据序列id提取序列_周欣5518的博客-CSDN博客

下载faSomeRecords脚本

Index of /admin/exe/linux.x86_64

chmod +x faSomeRecords  
# 赋予文件可执行权限,为Linux系统下执行
conda activate pfam_scan
conda install -c conda-forge libssh2
conda install -c anaconda libssh2

 使用示例

/media/aa/DATA/SZQ2/bj_software/faSomeRecords genome.all.fa ID.txt out.gene.fa   # 执行中 ./ 不能删除 
# 其中genome.all.fa是原始的fasta文件,包含自己需要的基因  
# ID.txt 是需要查找并导出的基因ID,每行一个  
# out.gene.fa 为输出文件,包含对应ID的序列信息。
/media/aa/Expansion/szq2/bj/b.OrthoFinder/compliantFasta4/compliantFasta

/media/aa/Expansion/szq2/bj/b.OrthoFinder/compliantFasta_CDS4/compliantFasta_CDS
# cellulose
cd /media/aa/Expansion/szq2/bj/cazymes/cellulose/compliantFasta
for i in `cat ../97list.txt`
do
    echo "/media/aa/DATA/SZQ2/bj_software/faSomeRecords /media/aa/Expansion/szq2/bj/b.OrthoFinder/compliantFasta4/compliantFasta/$i.fasta /media/aa/Expansion/szq2/bj/cazymes/cellulose/cellulose_id/$i.txt $i.fasta"
done > command.faSomeRecords.list
ParaFly -c command.faSomeRecords.list -CPU 48
cd /media/aa/Expansion/szq2/bj/cazymes/cellulose/compliantFasta_CDS
for i in `cat ../97list.txt`
do
    echo "/media/aa/DATA/SZQ2/bj_software/faSomeRecords /media/aa/Expansion/szq2/bj/b.OrthoFinder/compliantFasta_CDS4/compliantFasta_CDS/$i.fasta /media/aa/Expansion/szq2/bj/cazymes/cellulose/cellulose_id/$i.txt $i.fasta"
done > command.faSomeRecords.list
ParaFly -c command.faSomeRecords.list -CPU 48
# pectin
cd /media/aa/Expansion/szq2/bj/cazymes/pectin/compliantFasta
for i in `cat ../96list.txt`
do
    echo "/media/aa/DATA/SZQ2/bj_software/faSomeRecords /media/aa/Expansion/szq2/bj/b.OrthoFinder/compliantFasta4/compliantFasta/$i.fasta /media/aa/Expansion/szq2/bj/cazymes/pectin/pectin_id/$i.txt $i.fasta"
done > command.faSomeRecords.list
ParaFly -c command.faSomeRecords.list -CPU 48
cd /media/aa/Expansion/szq2/bj/cazymes/pectin/compliantFasta_CDS
for i in `cat ../96list.txt`
do
    echo "/media/aa/DATA/SZQ2/bj_software/faSomeRecords /media/aa/Expansion/szq2/bj/b.OrthoFinder/compliantFasta_CDS4/compliantFasta_CDS/$i.fasta /media/aa/Expansion/szq2/bj/cazymes/pectin/pectin_id/$i.txt $i.fasta"
done > command.faSomeRecords.list
ParaFly -c command.faSomeRecords.list -CPU 48
# lignin
cd /media/aa/Expansion/szq2/bj/cazymes/lignin/compliantFasta
for i in `cat ../97list.txt`
do
    echo "/media/aa/DATA/SZQ2/bj_software/faSomeRecords /media/aa/Expansion/szq2/bj/b.OrthoFinder/compliantFasta4/compliantFasta/$i.fasta /media/aa/Expansion/szq2/bj/cazymes/lignin/lignin_id/$i.txt $i.fasta"
done > command.faSomeRecords.list
ParaFly -c command.faSomeRecords.list -CPU 48
cd /media/aa/Expansion/szq2/bj/cazymes/lignin/compliantFasta_CDS
for i in `cat ../97list.txt`
do
    echo "/media/aa/DATA/SZQ2/bj_software/faSomeRecords /media/aa/Expansion/szq2/bj/b.OrthoFinder/compliantFasta_CDS4/compliantFasta_CDS/$i.fasta /media/aa/Expansion/szq2/bj/cazymes/lignin/lignin_id/$i.txt $i.fasta"
done > command.faSomeRecords.list
ParaFly -c command.faSomeRecords.list -CPU 48

all

# 合并所有
cd /media/aa/Expansion/szq2/bj/cazymes/all/compliantFasta
for i in `cat ../97list.txt`
do
    echo "cat /media/aa/Expansion/szq2/bj/cazymes/cellulose/compliantFasta/$i.fasta /media/aa/Expansion/szq2/bj/cazymes/pectin/compliantFasta/$i.fasta /media/aa/Expansion/szq2/bj/cazymes/lignin/compliantFasta/$i.fasta > $i.fasta"
done > command.cat.list
ParaFly -c command.cat.list -CPU 48
cd /media/aa/Expansion/szq2/bj/cazymes/all/compliantFasta_CDS
for i in `cat ../97list.txt`
do
    echo "cat /media/aa/Expansion/szq2/bj/cazymes/cellulose/compliantFasta_CDS/$i.fasta /media/aa/Expansion/szq2/bj/cazymes/pectin/compliantFasta_CDS/$i.fasta /media/aa/Expansion/szq2/bj/cazymes/lignin/compliantFasta_CDS/$i.fasta > $i.fasta"
done > command.cat.list
ParaFly -c command.cat.list -CPU 48

你可能感兴趣的:(数据整理,linux,运维,服务器)