use strict; use warnings; my $filename; my @filename; my $i; #氨基酸hash my(%genetic_code) = ( 'TCA' => 'S', # Serine 'TCC' => 'S', # Serine 'TCG' => 'S', # Serine 'TCT' => 'S', # Serine 'TTC' => 'F', # Phenylalanine 'TTT' => 'F', # Phenylalanine 'TTA' => 'L', # Leucine 'TTG' => 'L', # Leucine 'TAC' => 'Y', # Tyrosine 'TAT' => 'Y', # Tyrosine 'TAA' => '_', # Stop 'TAG' => '_', # Stop 'TGC' => 'C', # Cysteine 'TGT' => 'C', # Cysteine 'TGA' => '_', # Stop 'TGG' => 'W', # Tryptophan 'CTA' => 'L', # Leucine 'CTC' => 'L', # Leucine 'CTG' => 'L', # Leucine 'CTT' => 'L', # Leucine 'CCA' => 'P', # Proline 'CCC' => 'P', # Proline 'CCG' => 'P', # Proline 'CCT' => 'P', # Proline 'CAC' => 'H', # Histidine 'CAT' => 'H', # Histidine 'CAA' => 'Q', # Glutamine 'CAG' => 'Q', # Glutamine 'CGA' => 'R', # Arginine 'CGC' => 'R', # Arginine 'CGG' => 'R', # Arginine 'CGT' => 'R', # Arginine 'ATA' => 'I', # Isoleucine 'ATC' => 'I', # Isoleucine 'ATT' => 'I', # Isoleucine 'ATG' => 'M', # Methionine 'ACA' => 'T', # Threonine 'ACC' => 'T', # Threonine 'ACG' => 'T', # Threonine 'ACT' => 'T', # Threonine 'AAC' => 'N', # Asparagine 'AAT' => 'N', # Asparagine 'AAA' => 'K', # Lysine 'AAG' => 'K', # Lysine 'AGC' => 'S', # Serine 'AGT' => 'S', # Serine 'AGA' => 'R', # Arginine 'AGG' => 'R', # Arginine 'GTA' => 'V', # Valine 'GTC' => 'V', # Valine 'GTG' => 'V', # Valine 'GTT' => 'V', # Valine 'GCA' => 'A', # Alanine 'GCC' => 'A', # Alanine 'GCG' => 'A', # Alanine 'GCT' => 'A', # Alanine 'GAC' => 'D', # Aspartic Acid 'GAT' => 'D', # Aspartic Acid 'GAA' => 'E', # Glutamic Acid 'GAG' => 'E', # Glutamic Acid 'GGA' => 'G', # Glycine 'GGC' => 'G', # Glycine 'GGG' => 'G', # Glycine 'GGT' => 'G', # Glycine ); my $jianji_number=0; my %hash; my $DNA; my @DNA; my $jianji; my $first; my $second; my $base; my @information; my $key1; my $key2; my $key3; my $rever_first; my $rever_second; my $yushu; my $position; #一下程序对每一个染色体做一个hash @filename=qw/I II III IV V VI VII VIII IX X XI XII XIII XIV XV XVI/; foreach $filename (@filename) { $jianji_number=0; undef(@DNA); open(IN1,"chr$filename")||die("can not open"); $i=<IN1>; @DNA=<IN1>; $DNA=join('',@DNA); $DNA=~s/\s//g; for($position=0;$position<length $DNA;++$position)#hash中后面一个数字就是碱基的位置。本来少1个,但是我们人为的加上了1 { $base=substr($DNA,$position,1); $hash{$filename}{$position+1}="$base"; } } open(OUT,">TY_and_FTY.txt")||die("can not open"); open(IN,"information_of_tongyi_or_not.txt")||die("can not open"); while(<IN>) { chomp; @information=split/\s+/,$_; if($information[8] eq "+") { $key1=$hash{$information[0]}{$information[4]}; $key2=$hash{$information[0]}{$information[4]+1}; $key3=$hash{$information[0]}{$information[4]+2}; if($key1 eq "A" && $key2 eq "T" && $key3 eq "G") { if($information[7]==0) { $first=join('',$information[2],$hash{$information[0]}{$information[1]+1},$hash{$information[0]}{$information[1]+2}); $second=join('',$information[3],$hash{$information[0]}{$information[1]+1},$hash{$information[0]}{$information[1]+2}); if($genetic_code{$first} eq $genetic_code{$second}) { print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$first} TY0\n"; } else { print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} FTY\n"; } } elsif($information[7]==1) { $first=join('',$hash{$information[0]}{$information[1]-1},$information[2],$hash{$information[0]}{$information[1]+1}); $second=join('',$hash{$information[0]}{$information[1]-1},$information[3],$hash{$information[0]}{$information[1]+1}); if($genetic_code{$first} eq $genetic_code{$second}) { print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} TY\n"; } else { print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} FTY\n"; } } elsif($information[7]==2) { $first=join('',$hash{$information[0]}{$information[1]-2},$hash{$information[0]}{$information[1]-1},$information[2]); $second=join('',$hash{$information[0]}{$information[1]-2},$hash{$information[0]}{$information[1]-1},$information[3]); if($genetic_code{$first} eq $genetic_code{$second}) { print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} TY\n"; } else { print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} FTY\n"; } } } else { next; } } elsif($information[8] eq "-") { $key1=$hash{$information[0]}{$information[5]}; $key2=$hash{$information[0]}{$information[5]-1}; $key3=$hash{$information[0]}{$information[5]-2}; if($key1 eq "T" && $key2 eq "A" && $key3 eq "C")#因为这里是负链,所以对应的也应该是ATC的互补链,下面的翻译过程也要注意,所有的要先对应到正链的位置上,然后在进行翻译。 { print "T\n"; $yushu=($information[5]+1-$information[1])%3; if($yushu==1) #当反向的时候余数为1,那么就是氨基酸的第一位,余数为2,那么是氨基酸的第二位,余数为o为氨基酸的第三位 { $first=join('',$information[2],$hash{$information[0]}{$information[1]-1},$hash{$information[0]}{$information[1]-2}); $second=join('',$information[3],$hash{$information[0]}{$information[1]-1},$hash{$information[0]}{$information[1]-2}); $rever_first=revcom($first); $rever_second=revcom($second); if($genetic_code{$rever_first} eq $genetic_code{$rever_second}) { print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} TY\n"; } else { print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} FTY\n"; } } elsif($yushu==2) { $first=join('',$hash{$information[0]}{$information[1]+1},$information[2],$hash{$information[0]}{$information[1]-1}); $second=join('',$hash{$information[0]}{$information[1]+1},$information[3],$hash{$information[0]}{$information[1]-1}); $rever_first=revcom($first); $rever_second=revcom($second); if($genetic_code{$rever_first} eq $genetic_code{$rever_second}) { print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} TY\n"; } else { print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} FTY\n"; } } elsif($yushu==0) { $first=join('',$hash{$information[0]}{$information[1]+2},$hash{$information[0]}{$information[1]+1},$information[2]); $second=join('',$hash{$information[0]}{$information[1]+2},$hash{$information[0]}{$information[1]+1},$information[3]); $rever_first=revcom($first); $rever_second=revcom($second); if($genetic_code{$rever_first} eq $genetic_code{$rever_second}) { print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} TY\n"; } else { print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} FTY\n"; } } } } else { next; } } #子程序:获得互补序列 sub revcom { # A subroutine to compute the reverse complement of DNA sequence # 一个获取DNA互补序列的子程序 my($dna)=@_; print $dna."\n"; my ($revcom)=reverse($dna); $revcom=~tr/ACGTacgt/TGCAtgca/; return $revcom; }