利用perl脚本统计标记的信息

输入数据格式为:


输入文件示例

利用该脚本,可以获得标记的等位信息、杂合率、MAF、缺失率及PIC值。

perl stat_info_marker.pl example.txt
#!/usr/bin/perl

use warnings;
use strict;

open IN,"$ARGV[0]";
open OUT,">marker-info-$ARGV[0]";
print OUT "Name\tAlleles\tHeterogeneity\tMAF\tMissing\tPIC\n";
;
while () {
    chomp;
    my @a = split/\s+/;
    print OUT "$a[0]\t",stat_alleles(@a[1..$#a]),"\t",stat_het(@a[1..$#a]),"\t",stat_maf(@a[1..$#a]),"\t",stat_missing(@a[1..$#a]),"\t",stat_pic(@a[1..$#a]),"\n";

}
close IN;
close OUT;

sub stat_alleles {
    my %alleles = ();
    foreach  (@_) {
        s/://;
        /(\w)(\w)/;
        next if $1 =~ /N/;
        $alleles{$1}++;
        $alleles{$2}++;
    }
    return join("/",sort keys %alleles);
}


sub stat_missing {
    my $a = 0;
    my $b = 0;
    foreach  (@_) {
        s/://;
        /(\w)(\w)/;
        if ($1 =~ /N/) {
            $a++;
        }
        $b++;
    }
    if ($b > 0) {
        return $a/$b;
    }else{
        return "NA";
    }
}

sub stat_maf {
    my $t = 0;
    my %maf = ();
    foreach  (@_) {
        s/://;
        /(\w)(\w)/;
        if ($1 =~ /N/) {
            next;
        } else{
            $maf{$1}++;
            $maf{$2}++;
            $t += 2;
        }
    }
    my @return = sort{$maf{$b}<=>$maf{$a}}keys %maf;
    if ($#return >= 1 && $t > 0) {
        return $maf{$return[1]}/$t;
    }else{
        return "NA";
    }
}

sub stat_pic {
    my $a = 0;
    my $b = 0;
    my %pic = ();
    foreach  (@_) {
        s/://;
        /(\w)(\w)/;
        if ($1 =~ /N/) {
            next;
        } else{
            $pic{$1}++;
            $pic{$2}++;
            $a += 2;
        }
    }
    if ($a > 0) {
        foreach  (values %pic) {
            $b += (($_/$a)*($_/$a));
        }
        return 1-$b;
    } else{
        return "NA";
    }
}


sub stat_het {
    my $a = 0;
    my $b = 0;
    foreach  (@_) {
        s/://;
        /(\w)(\w)/;
        if ($1 =~ /N/) {
            next;
        } elsif ($1 ne $2) {
            $a++;
        } else {
            $b++;
        }
    }
    if ($a+$b > 0) {
        return $a/($a+$b);
    }else{
        return "NA";
    }
}


你可能感兴趣的:(利用perl脚本统计标记的信息)