RNA_Seq差异表达分析流程

RNA_Seq差异表达分析流程

1、数据下载

ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/005/SRR1228245/SRR1228245_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/005/SRR1228245/SRR1228245_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/006/SRR1228246/SRR1228246_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/006/SRR1228246/SRR1228246_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/007/SRR1228247/SRR1228247_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/007/SRR1228247/SRR1228247_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/008/SRR1228248/SRR1228248_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/008/SRR1228248/SRR1228248_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/009/SRR1228249/SRR1228249_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/009/SRR1228249/SRR1228249_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/000/SRR1228250/SRR1228250_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/000/SRR1228250/SRR1228250_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/001/SRR1228251/SRR1228251_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/001/SRR1228251/SRR1228251_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/002/SRR1228252/SRR1228252_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/002/SRR1228252/SRR1228252_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/003/SRR1228253/SRR1228253_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/003/SRR1228253/SRR1228253_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/004/SRR1228254/SRR1228254_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/004/SRR1228254/SRR1228254_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/005/SRR1228255/SRR1228255_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/005/SRR1228255/SRR1228255_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/006/SRR1228256/SRR1228256_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/006/SRR1228256/SRR1228256_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/007/SRR1228257/SRR1228257_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/007/SRR1228257/SRR1228257_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/008/SRR1228258/SRR1228258_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/008/SRR1228258/SRR1228258_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/009/SRR1228259/SRR1228259_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/009/SRR1228259/SRR1228259_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/000/SRR1228260/SRR1228260_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/000/SRR1228260/SRR1228260_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/001/SRR1228261/SRR1228261.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/001/SRR1228261/SRR1228261_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/001/SRR1228261/SRR1228261_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/002/SRR1228262/SRR1228262_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/002/SRR1228262/SRR1228262_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/003/SRR1228263/SRR1228263_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/003/SRR1228263/SRR1228263_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/004/SRR1228264/SRR1228264_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/004/SRR1228264/SRR1228264_2.fastq.gz
ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/005/SRR1228265/SRR1228265_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR122/005/SRR1228265/SRR1228265_2.fastq.gz

整理成输入格式 input.txt

SRR1228245_1.fastq.gz,SRR1228245_2.fastq.gz,SRR1228245_1.fq,SRR1228245_2.fq,SRR1228245_1.second.clean.fq.gz,SRR1228245_2.second.clean.fq.gz
SRR1228246_1.fastq.gz,SRR1228246_2.fastq.gz,SRR1228246_1.fq,SRR1228246_2.fq,SRR1228246_1.second.clean.fq.gz,SRR1228246_2.second.clean.fq.gz
SRR1228247_1.fastq.gz,SRR1228247_2.fastq.gz,SRR1228247_1.fq,SRR1228247_2.fq,SRR1228247_1.second.clean.fq.gz,SRR1228247_2.second.clean.fq.gz
SRR1228252_1.fastq.gz,SRR1228252_2.fastq.gz,SRR1228252_1.fq,SRR1228252_2.fq,SRR1228252_1.second.clean.fq.gz,SRR1228252_2.second.clean.fq.gz
SRR1228253_1.fastq.gz,SRR1228253_2.fastq.gz,SRR1228253_1.fq,SRR1228253_2.fq,SRR1228253_1.second.clean.fq.gz,SRR1228253_2.second.clean.fq.gz
SRR1228261_1.fastq.gz,SRR1228261_2.fastq.gz,SRR1228261_1.fq,SRR1228261_2.fq,SRR1228261_1.second.clean.fq.gz,SRR1228261_2.second.clean.fq.gz
SRR1228262_1.fastq.gz,SRR1228262_2.fastq.gz,SRR1228262_1.fq,SRR1228262_2.fq,SRR1228262_1.second.clean.fq.gz,SRR1228262_2.second.clean.fq.gz
SRR1228263_1.fastq.gz,SRR1228263_2.fastq.gz,SRR1228263_1.fq,SRR1228263_2.fq,SRR1228263_1.second.clean.fq.gz,SRR1228263_2.second.clean.fq.gz
SRR1228265_1.fastq.gz,SRR1228265_2.fastq.gz,SRR1228265_1.fq,SRR1228265_2.fq,SRR1228265_1.second.clean.fq.gz,SRR1228265_2.second.clean.fq.gz
SRR1228250_1.fastq.gz,SRR1228250_2.fastq.gz,SRR1228250_1.fq,SRR1228250_2.fq,SRR1228250_1.second.clean.fq.gz,SRR1228250_2.second.clean.fq.gz
SRR1228248_1.fastq.gz,SRR1228248_2.fastq.gz,SRR1228248_1.fq,SRR1228248_2.fq,SRR1228248_1.second.clean.fq.gz,SRR1228248_2.second.clean.fq.gz
SRR1228249_1.fastq.gz,SRR1228249_2.fastq.gz,SRR1228249_1.fq,SRR1228249_2.fq,SRR1228249_1.second.clean.fq.gz,SRR1228249_2.second.clean.fq.gz
SRR1228251_1.fastq.gz,SRR1228251_2.fastq.gz,SRR1228251_1.fq,SRR1228251_2.fq,SRR1228251_1.second.clean.fq.gz,SRR1228251_2.second.clean.fq.gz
SRR1228254_1.fastq.gz,SRR1228254_2.fastq.gz,SRR1228254_1.fq,SRR1228254_2.fq,SRR1228254_1.second.clean.fq.gz,SRR1228254_2.second.clean.fq.gz
SRR1228255_1.fastq.gz,SRR1228255_2.fastq.gz,SRR1228255_1.fq,SRR1228255_2.fq,SRR1228255_1.second.clean.fq.gz,SRR1228255_2.second.clean.fq.gz
SRR1228256_1.fastq.gz,SRR1228256_2.fastq.gz,SRR1228256_1.fq,SRR1228256_2.fq,SRR1228256_1.second.clean.fq.gz,SRR1228256_2.second.clean.fq.gz
SRR1228257_1.fastq.gz,SRR1228257_2.fastq.gz,SRR1228257_1.fq,SRR1228257_2.fq,SRR1228257_1.second.clean.fq.gz,SRR1228257_2.second.clean.fq.gz
SRR1228258_1.fastq.gz,SRR1228258_2.fastq.gz,SRR1228258_1.fq,SRR1228258_2.fq,SRR1228258_1.second.clean.fq.gz,SRR1228258_2.second.clean.fq.gz
SRR1228259_1.fastq.gz,SRR1228259_2.fastq.gz,SRR1228259_1.fq,SRR1228259_2.fq,SRR1228259_1.second.clean.fq.gz,SRR1228259_2.second.clean.fq.gz
SRR1228260_1.fastq.gz,SRR1228260_2.fastq.gz,SRR1228260_1.fq,SRR1228260_2.fq,SRR1228260_1.second.clean.fq.gz,SRR1228260_2.second.clean.fq.gz
SRR1228264_1.fastq.gz,SRR1228264_2.fastq.gz,SRR1228264_1.fq,SRR1228264_2.fq,SRR1228264_1.second.clean.fq.gz,SRR1228264_2.second.clean.fq.gz

2、去除接头载体,然后与参考基因组比对,比对结果筛选mismatch小于2的reads,并将sam文件转换为bam文件

#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'shengwei ma'
__author_email__ = '[email protected]'


import sh
import subprocess

a = []
b = []
with open('input.txt', 'r') as f:
    for line in f:
        lin = line.strip().split(',')
        fq1, fq2, fq1_1, fq2_1, fq1_2, fq2_2 = lin[0], lin[1], lin[2], lin[3], lin[4], lin[5]
        sh.AdapterRemoval('--file1', fq1, '--file2', fq2, '--qualitybase', '33', '--trimns',
                        '--minlength', '40', '--threads', '10', '--adapter-list',
        '/data1/masw/adapterremoval-2.1.7/benchmark/adapters/mixed.table', '--output1', fq1_1, '--output2',
                          fq2_1)
        print fq1_1, fq2_1
        a.append([fq1_1, fq2_1])
for i in a:
    fq1, fq2 = i[0], i[1]
    proc = subprocess.Popen(
            ['hisat2', '-p', '20', '--dta', '-x', '/data2/Fshare/IWGSCv1.0_hisat2/IWGSCv1.0_hiast2', '--known-splicesite-infile',
             '/data2/Fshare/IWGSCv1.0_hisat2/TGACv1.ss', '--novel-splicesite-infile', '/data2/masw_data/rna_seq/all.ss',
             '-t', '-1', fq1, '-2', fq2, '-S', fq1[:-5] + '.sam'], shell=False)
    proc.wait()
    print fq1[:-5] + '.sam'
    b.append(fq1[:-5] + '.sam')
for i in b:
    print i
    proc = subprocess.Popen('grep -E "@|NM:i:0|NM:i:1" ' + i + ' > ' + i[:-3] + 'less_than_2_mismatch.sam', shell=True)
    proc.wait()
    proc = subprocess.Popen(['samtools', 'view', '-b', i[:-3] + 'less_than_2_mismatch.sam', '-o', i[:-3] + 'less_than_2_mismatch.bam'], shell=False)
    proc.wait()

获得bam文件之后,使用featureCounts统计原始reads数,输入的文件是gtf格式的基因组注释文件。这个基因组的gtf文件生成过程是:使用gmap软件将基因mapping到基因组上,生成gff3格式的文件;将gff3格式的转换成gtf文件即可。转换的脚本见下一篇博文 gff3 to gtf.

featureCounts -T 20 -t exon -g transcript_id --readExtension5 70  --readExtension3 70 -p -O --donotsort -C -a /data2/masw_data/transcript/TGACv1.cdna.reformat.gtf -o TGACv1.rust.unique_count.txt SRR1228254.less_than_2_mismatch.bam SRR1228255.less_than_2_mismatch.bam SRR1228256.less_than_2_mismatch.bam SRR1228245.less_than_2_mismatch.bam SRR1228246.less_than_2_mismatch.bam SRR1228247.less_than_2_mismatch.bam SRR1228248.less_than_2_mismatch.bam SRR1228249.less_than_2_mismatch.bam SRR1228250.less_than_2_mismatch.bam SRR1228251.less_than_2_mismatch.bam SRR1228252.less_than_2_mismatch.bam SRR1228253.less_than_2_mismatch.bam SRR1228257.less_than_2_mismatch.bam SRR1228258.less_than_2_mismatch.bam SRR1228259.less_than_2_mismatch.bam SRR1228260.less_than_2_mismatch.bam SRR1228261.less_than_2_mismatch.bam SRR1228262.less_than_2_mismatch.bam SRR1228263.less_than_2_mismatch.bam SRR1228264.less_than_2_mismatch.bam SRR1228265.less_than_2_mismatch.bam

有了原始的count,结合每个下面的脚本就可以算出每个基因的FPKM值。这个脚本已经将3个重复的平均值算出。以及重复间的标准差。得到每个sample的FPKM值,可以算出基因是否差异表达。计算p-value没有包括在这个脚本中。因为我只是想要看某一个基因的表达情况,所以调出这个基因之后,可以使用excel简单的算下是否差异表达。

#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'shengwei ma'
__author_email__ = '[email protected]'

import numpy as np

all_fpkm = open('rust_all_sample_fpkm.txt', 'w')
all_fpkm.writelines("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" \
                  "\t%s\t%s\t%s" % ('Geneid', 'Chr', 'Start', 'End', 'Strand', 'Length',
                                    'new_Bgt_0dpi_repeatI', 'new_Bgt_0dpi_repeatII', 'new_Bgt_0dpi_repeatIII',
                                    'new_Pst_24dpi_repeatI', 'new_Pst_24dpi_repeatII', 'new_Pst_24dpi_repeatIII',
                                    'new_Pst_48dpi_repeatI', 'new_Pst_48dpi_repeatII', 'new_Pst_48dpi_repeatIII',
                                    'new_Pst_72dpi_repeatI', 'new_Pst_72dpi_repeatII', 'new_Pst_72dpi_repeatIII',
                                    'new_Bgt_24dpi_repeatI', 'new_Bgt_24dpi_repeatII', 'new_Bgt_24dpi_repeatIII',
                                    'new_Bgt_48dpi_repeatI', 'new_Bgt_48dpi_repeatII', 'new_Bgt_48dpi_repeatIII',
                                    'new_Bgt_72dpi_repeatI', 'new_Bgt_72dpi_repeatII', 'new_Bgt_72dpi_repeatIII'))
raw_total = [('Bgt_0dpi_repeatI', 49168553), ('Bgt_0dpi_repeatII', 44047402), ('Bgt_0dpi_repeatIII', 78098556),
             ('Pst_24dpi_repeatI', 38474362), ('Pst_24dpi_repeatII', 79981030), ('Pst_24dpi_repeatIII', 41041508),
             ('Pst_48dpi_repeatI', 46935246), ('Pst_48dpi_repeatII', 38803969), ('Pst_48dpi_repeatIII', 51627704),
             ('Pst_72dpi_repeatI', 37219517), ('Pst_72dpi_repeatII', 39849949), ('Pst_72dpi_repeatIII', 40299574),
             ('Bgt_24dpi_repeatI', 38168988), ('Bgt_24dpi_repeatII', 43073693), ('Bgt_24dpi_repeatIII', 44071613),
             ('Bgt_48dpi_repeatI', 40380776), ('Bgt_48dpi_repeatII', 32810256), ('Bgt_48dpi_repeatIII', 35749803),
             ('Bgt_72dpi_repeatI', 46203474), ('Bgt_72dpi_repeatII', 43612313), ('Bgt_72dpi_repeatIII', 40406588),
             ] #数字表示每个sample的总mapping reads数目


with open('MLJ_unique_expression.txt', 'r') as f:
    print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" \
                        "\t%s\t%s\t%s" % \
                        ('Geneid', 'Chr', 'Start', 'End', 'Strand', 'Length', 'Bgt_0dpi', 'Pst_24dpi', 'Pst_48dpi',
                         'Pst_72dpi', 'Bgt_24dpi', 'Bgt_48dpi', 'Bgt_72dpi', 'Bgt_0dpi_std', 'Pst_24dpi_std',
                         'Pst_48dpi_std',
                         'Pst_72dpi_std', 'Bgt_24dpi_std', 'Bgt_48dpi_std', 'Bgt_72dpi_std')

    for line in f:
        if line.startswith('#') or line.startswith('Geneid'):
            pass
        else:
            new = line.strip().split('\t')
            (Geneid, Chr, Start, End, Strand, Length, Bgt_0dpi_repeatI, Bgt_0dpi_repeatII, Bgt_0dpi_repeatIII, Pst_24dpi_repeatI,
             Pst_24dpi_repeatII, Pst_24dpi_repeatIII, Pst_48dpi_repeatI, Pst_48dpi_repeatII, Pst_48dpi_repeatIII, Pst_72dpi_repeatI, Pst_72dpi_repeatII,
             Pst_72dpi_repeatIII, Bgt_24dpi_repeatI, Bgt_24dpi_repeatII, Bgt_24dpi_repeatIII, Bgt_48dpi_repeatI, Bgt_48dpi_repeatII, Bgt_48dpi_repeatIII,
             Bgt_72dpi_repeatI, Bgt_72dpi_repeatII, Bgt_72dpi_repeatIII) = new
            new_Bgt_0dpi_repeatI = int(Bgt_0dpi_repeatI) * pow(10.0, 9) / (int(Length) * int(raw_total[0][-1]))
            new_Bgt_0dpi_repeatII = int(Bgt_0dpi_repeatII) * pow(10.0, 9) / (int(Length) * int(raw_total[1][-1]))
            new_Bgt_0dpi_repeatIII = int(Bgt_0dpi_repeatIII) * pow(10.0, 9) / (int(Length) * int(raw_total[2][-1]))
            new_Pst_24dpi_repeatI = int(Pst_24dpi_repeatI) * pow(10.0, 9) / (int(Length) * int(raw_total[3][-1]))
            new_Pst_24dpi_repeatII = int(Pst_24dpi_repeatII) * pow(10.0, 9) / (int(Length) * int(raw_total[4][-1]))
            new_Pst_24dpi_repeatIII = int(Pst_24dpi_repeatIII) * pow(10.0, 9) / (int(Length) * int(raw_total[5][-1]))
            new_Pst_48dpi_repeatI = int(Pst_48dpi_repeatI) * pow(10.0, 9) / (int(Length) * int(raw_total[6][-1]))
            new_Pst_48dpi_repeatII = int(Pst_48dpi_repeatII) * pow(10.0, 9) / (int(Length) * int(raw_total[7][-1]))
            new_Pst_48dpi_repeatIII = int(Pst_48dpi_repeatIII) * pow(10.0, 9) / (int(Length) * int(raw_total[8][-1]))
            new_Pst_72dpi_repeatI = int(Pst_72dpi_repeatI) * pow(10.0, 9) / (int(Length) * int(raw_total[9][-1]))
            new_Pst_72dpi_repeatII = int(Pst_72dpi_repeatII) * pow(10.0, 9) / (int(Length) * int(raw_total[10][-1]))
            new_Pst_72dpi_repeatIII = int(Pst_72dpi_repeatIII) * pow(10.0, 9) / (int(Length) * int(raw_total[11][-1]))
            new_Bgt_24dpi_repeatI = int(Bgt_24dpi_repeatI) * pow(10.0, 9) / (int(Length) * int(raw_total[12][-1]))
            new_Bgt_24dpi_repeatII = int(Bgt_24dpi_repeatII) * pow(10.0, 9) / (int(Length) * int(raw_total[13][-1]))
            new_Bgt_24dpi_repeatIII = int(Bgt_24dpi_repeatIII) * pow(10.0, 9) / (int(Length) * int(raw_total[14][-1]))
            new_Bgt_48dpi_repeatI = int(Bgt_48dpi_repeatI) * pow(10.0, 9) / (int(Length) * int(raw_total[15][-1]))
            new_Bgt_48dpi_repeatII = int(Bgt_48dpi_repeatII) * pow(10.0, 9) / (int(Length) * int(raw_total[16][-1]))
            new_Bgt_48dpi_repeatIII = int(Bgt_48dpi_repeatIII) * pow(10.0 , 6) / (int(Length) * int(raw_total[17][-1]))
            new_Bgt_72dpi_repeatI = int(Bgt_72dpi_repeatI) * pow(10.0, 9) / (int(Length) * int(raw_total[18][-1]))
            new_Bgt_72dpi_repeatII = int(Bgt_72dpi_repeatII) * pow(10.0, 9) / (int(Length) * int(raw_total[19][-1]))
            new_Bgt_72dpi_repeatIII = int(Bgt_72dpi_repeatIII) * pow(10.0, 9) / (int(Length) * int(raw_total[20][-1]))

            Bgt_0dpi_mean = np.mean(np.array([new_Bgt_0dpi_repeatI, new_Bgt_0dpi_repeatII, new_Bgt_0dpi_repeatIII]))
            Bgt_0dpi_std = np.std(np.array([new_Bgt_0dpi_repeatI, new_Bgt_0dpi_repeatII, new_Bgt_0dpi_repeatIII]))
            Pst_24dpi_mean = np.mean(np.array([new_Pst_24dpi_repeatI, new_Pst_24dpi_repeatII, new_Pst_24dpi_repeatIII]))
            Pst_24dpi_std = np.std(np.array([new_Pst_24dpi_repeatI, new_Pst_24dpi_repeatII, new_Pst_24dpi_repeatIII]))
            Pst_48dpi_mean = np.mean(np.array([new_Pst_48dpi_repeatI, new_Pst_48dpi_repeatII, new_Pst_48dpi_repeatIII]))
            Pst_48dpi_std = np.std(np.array([new_Pst_48dpi_repeatI, new_Pst_48dpi_repeatII, new_Pst_48dpi_repeatIII]))
            Pst_72dpi_mean = np.mean(np.array([new_Pst_72dpi_repeatI, new_Pst_72dpi_repeatII, new_Pst_72dpi_repeatIII]))
            Pst_72dpi_std = np.std(np.array([new_Pst_72dpi_repeatI, new_Pst_72dpi_repeatII, new_Pst_72dpi_repeatIII]))
            Bgt_24dpi_mean = np.mean(np.array([new_Bgt_24dpi_repeatI, new_Bgt_24dpi_repeatII, new_Bgt_24dpi_repeatIII]))
            Bgt_24dpi_std = np.std(np.array([new_Bgt_24dpi_repeatI, new_Bgt_24dpi_repeatII, new_Bgt_24dpi_repeatIII]))
            Bgt_48dpi_mean = np.mean(np.array([new_Bgt_48dpi_repeatI, new_Bgt_48dpi_repeatII, new_Bgt_48dpi_repeatIII]))
            Bgt_48dpi_std = np.std(np.array([new_Bgt_48dpi_repeatI, new_Bgt_48dpi_repeatII, new_Bgt_48dpi_repeatIII]))
            Bgt_72dpi_mean = np.mean(np.array([new_Bgt_72dpi_repeatI, new_Bgt_72dpi_repeatII, new_Bgt_72dpi_repeatIII]))
            Bgt_72dpi_std = np.std(np.array([new_Bgt_72dpi_repeatI, new_Bgt_72dpi_repeatII, new_Bgt_72dpi_repeatIII]))


            print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" \
                  "\t%s\t%s\t%s" % \
                  (Geneid, Chr, Start, End, Strand, Length, Bgt_0dpi_mean, Pst_24dpi_mean, Pst_48dpi_mean, Pst_72dpi_mean,
                   Bgt_24dpi_mean, Bgt_48dpi_mean, Bgt_72dpi_mean, Bgt_0dpi_std, Pst_24dpi_std, Pst_48dpi_std,
                   Pst_72dpi_std, Bgt_24dpi_std, Bgt_48dpi_std, Bgt_72dpi_std)
            all_fpkm.writelines(
                "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" \
                "\t%s\t%s\t%s" % (Geneid, Chr, Start, End, Strand, Length,
                                  new_Bgt_0dpi_repeatI, new_Bgt_0dpi_repeatII, new_Bgt_0dpi_repeatIII,
                                  new_Pst_24dpi_repeatI, new_Pst_24dpi_repeatII, new_Pst_24dpi_repeatIII,
                                  new_Pst_48dpi_repeatI, new_Pst_48dpi_repeatII, new_Pst_48dpi_repeatIII,
                                  new_Pst_72dpi_repeatI, new_Pst_72dpi_repeatII, new_Pst_72dpi_repeatIII,
                                  new_Bgt_24dpi_repeatI, new_Bgt_24dpi_repeatII, new_Bgt_24dpi_repeatIII,
                                  new_Bgt_48dpi_repeatI, new_Bgt_48dpi_repeatII, new_Bgt_48dpi_repeatIII,
                                  new_Bgt_72dpi_repeatI, new_Bgt_72dpi_repeatII, new_Bgt_72dpi_repeatIII))
all_fpkm.close()

你可能感兴趣的:(生物信息,python)