perl脚本语言练习

1、计算FASTA文件中各种碱基(A,T,C,G和代表未知碱基的N)的数目和百分数。

2、计算FASTA文件所包含序列的scaffold N50长度。

3、计算GFF3文件中所记录的各种类型repeat的总长,以及占FASTA文件序列总长的百分数。


#!/usr/bin/perl -w


my $num_A=0;
my $num_T=0;
my $num_C=0;
my $num_G=0;
my $num_N=0;
my $num_total=0;


open(IN, "./Test.fasta") or die;


my $index=1;


my $line;
my $line_num =0;
my $no_A_length;
my $no_T_length;
my $no_C_length;
my $no_G_length;
my $no_N_length;
while()
{   
#    print "\n\n$_\n\n";
    $line_num = length($_); 


    $num_total +=($line_num-1);
#    print "$line_num  ";
    $_=~s/A//g;
   
    $no_A_length = length($_);
#print "$no_A_length    ";
    $num_A += ($line_num-$no_A_length);
    
    $_=~s/T//g;
    $no_T_length = length($_);
 #   print "$no_T_length  ";
    $num_T += ($no_A_length - $no_T_length);
    
    $_=~s/C//g;
   
    $no_C_length = length($_);
#    print "$no_C_length  ";
    $num_C+=($no_T_length - $no_C_length); 


    $_=~s/G//g;
    $no_G_length = length($_); 
#   print "$no_G_length  ";
    $num_G += ($no_C_length-$no_G_length);     
   
    $_=~s/N//g;
    $no_N_length = length($_);
    $num_N += ($no_G_length - $no_N_length);
   
}
print "TOTAL NUM IS $num_total\n";
print "A NUM IS $num_A\n";
print "T NUM IS $num_T\n";
print "C NUM IS $num_C\n";
print "G NUM IS $num_G\n";
print "N NUM IS $num_N\n";
print "A PERCENT IS ".($num_A*100/$num_total)."%\n";
print "T PERCENT IS ".($num_T*100/$num_total)."%\n";
print "C PERCENT IS ".($num_C*100/$num_total)."%\n";
print "G PERCENT IS ".($num_G*100/$num_total)."%\n";
print "N PERCENT IS ".($num_N*100/$num_total)."%\n";
exit;






#!/usr/bin/perl -w
my @repeatclass;
my @repeattmp;
my @repeatall;
my $classlen;
my @classier;
my $index=0;
my $i=0;


my $flag = 0;


open(IN, "./Repeat.gff") or die;


while()
{
    chomp;
    @repeattmp = split(/ /, $_);


    $classlen = $repeattmp[4] - $repeattmp[3];


    @repeatclass = split(/;/, $repeattmp[8]);




   $i = 0;


    while($i < $index)
    {   
        #print "$repeatall[$i][0]\n";
        if($repeatall[$i][0] eq $repeatclass[2])
        {
             $flag =1;
             last;
}
        $i ++;
    }


    if($flag == 0)
    {
         @classier = [$repeatclass[2], $classlen];
         push(@repeatall, @classier);
         $index++;
    }
   else
   {
        $repeatall[$i][1] += $classlen;
$flag = 0;
   }
}
exit;



#!/usr/bin/perl -w
#
my $line_num;
my $line_scafolld;
my $total_num =0;
my $i = 0;
open(IN, "./Test.fasta") or die;


my @scafolld = ();


my @temp;


while()
{
    chomp;


    $line_num = length($_);
    if(!(/^>/))
    {
$total_num += $line_num;
@temp = [$line_scafolld,$line_num,$_];
        push(@scafolld,@temp);
# push(@scafolld, $line_num);
        $i++;
    }
    else
    {
        $line_scafolld = $_;
    }     
    
}
@scafolld = sort{$a->[1]<=>$b->[1]} @scafolld;


my $index = 0;
my $num = 0;


while(1)
{
     print "$scafolld[$index][1]\n";
     $num += $scafolld[$index][1];
     if($num*2 > $total_num)
     {
print "$scafolld[$index][0]\n";
# print "$scafolld[$index][2]\n";
        exit;
     }


     $index++;
     if($index == $i)
     {exit;}
}




你可能感兴趣的:(应用)