处理由Popgen32的Genetic Distance选项生成的数据

这两个脚本是相关连的。可以执行脚本1后再执行脚本2。

第三个脚本将脚本2生成的某文件变为对称矩阵。

文件名神马的当然得改了,程序友好度不佳

脚本1:

 dist_ident_1.pl

#!perl -w
# 这是以前写的代码,tab不改空格了……
#貌似运行成功
#分开Nei 1972或1978的数据的 genetic distance 与 genetic identity 表
#数据是Popgene32算出的
use strict;
my $rst = 'pop57'; # 某文件,后缀'rst'就不用改了
#$rst =~ s/\..*$//;
genetic_nei($rst);

###================================================
# 子程序
###================================================

# 分开Nei 1972和1978的数据
sub genetic_nei {
	foreach (@_) {
		my $in = $_;
		my $out = "$in" . "_1972.txt";
		my $outi = "$in" . "_1978.txt";
		open IN, "<", "$in" . ".rst" or die "Can't open '$in': $!";
		open OUT, ">", "$out" or die "Can't write to '$out': $!";
		open OUTI, ">", "$outi" or die "Can't write to '$outi': $!";
		my $n = 0;
		my $m = undef;
		while(<IN>) {
		  
		  print OUT "$1\n\n" if /(See Nei.*292)/;
		  print OUTI "$1\n\n" if /(See Nei.*590)/;
		  if(/^\d+/) {
		     s/^\d+\s+//; # 去掉开头的编号
			 s/ *$//;     # 去掉末尾的空格
			 s/ +/\t/g;   # 把多个空格换成一个制表符
			 $m = (split) unless (defined $m);
			 if($n < $m) {
			   print OUT;
			 } else {
			   print OUTI;
			 }
			 $n++;
		  }
		}
		close IN;
		close OUT;
		close OUTI;
		divide($out, $outi);
	}
}

###================================================
# 分开两组数据
sub divide {
	foreach ( @_ ) {
		my $in = $_;
		my $out = $in;
		my $outi = $in;
		$out =~ s/^/identity_/;  # 特征值
		$outi =~ s/^/dist_/;     # 遗传距离
		open IN, "<$in" or die "Can't open '$in': $!";
		open OUT, ">", "$out" or die "Can't write to '$out': $!";
		open OUTI, ">", "$outi" or die "Can't write to '$outi': $!";

		while(<IN>) {
		  if (/\*{4}/) {
			my $m = $`;         # 遗传距离
			my $n = $';         # 特征值
			print OUTI "$m" . "0\n"; # 把“****”替换为“0”
			$m =~ s/[^\s]+/1/g;      # 把遗传距离表替换为“1”
			print OUT "$m" . "1" ."$n"; # 之前的“****”以“1”代替
		  }
		}
		close IN;
		close OUT;
		close OUTI;
	}
}



 脚本2:

 dist_ident_2.pl

#!perl -w
#貌似运行成功
#Nei 1972和1978的数据最大最小值
#数据是Popgene32算出的
use strict;

# 从前是 @ARGV,今 @hehe
my @hehe = qw(pop57);

my @dist;
my @ident;
my ($d_1972, $d_1978, $i_1972, $i_1978);
foreach ( @hehe ) {
  $d_1972 = "dist_" . $_ . "_1972.txt";
  $d_1978 = "dist_" . $_ . "_1978.txt";
  $i_1972 = "identity_" . $_ . "_1972.txt";
  $i_1978 = "identity_" . $_ . "_1978.txt";
  push @dist, $d_1972;
  push @dist, $d_1978;
  push @ident, $i_1972;
  push @ident, $i_1978;
}
max_dist(@dist);
min_ident(@ident);
print "Done!\n";

#####
sub max {
  my $max_so_far = shift @_;
  foreach ( @_ ) {     
	 if ($_ > $max_so_far) {
	     $max_so_far = $_;		 
	 }
  }
  $max_so_far;
 }
 #####
 sub min {
  my $min_so_far = shift @_;
  foreach ( @_ ) {     
	 if ($_ < $min_so_far) {
	     $min_so_far = $_;		 
	 }
  }
  $min_so_far;
 }
#####
# 求数据的最大值或最小值

sub max_dist {
  foreach ( @_ ) {
    my $in = $_;
    my $out = $in;
    my @pops;
    open IN, "<$in" or die "Can't open '$in': $!";
    $out =~ s/\.txt$//;
    open OUT, '>', "$out" . "_结果.txt" or die "Can't write to '_结果.txt': $!";
    my $try = 0;
    my $i;
    while(<IN>) {
      my @nums = split /\s+/, $_;
      my $num = @nums;
      @pops = (1 .. $num);
      $try = max(@nums, $try);  
    }
    print OUT "\n最大值为:$try\n";
    close IN;
    open IN, "<$in" or die "Can't open '$in': $!"; # 需要重新打开,目前不知道更好的办法

    while(<IN>) {
      $i++; # 第几行
      my $j;
      foreach ( split /\s+/, $_ ) {
        $j++;
        if ($_ == $try) {
	      print OUT $pops[$i - 1], "\t", $pops[$j - 1], "\n"; # 对应的各个坐标
	    }
      }
    }
  }
}

sub min_ident {
  foreach ( @_ ) {
    my $in = $_;
    my $out = $in;
    my @pops;
    open IN, "<$in" or die "Can't open '$in': $!";
    $out =~ s/\.txt$//;
    open OUT, '>', "$out" . "_结果.txt" or die "Can't write to '_结果.txt': $!";
    my $try = 1;
    my $i;
    while(<IN>) {
      my @nums = split /\s+/, $_;
      my $num = @nums;
      @pops = (1 .. $num);
      $try = min(@nums, $try);  
    }
    print OUT "\n最小值为:$try\n";
    close IN;
    open IN, "<$in" or die "Can't open '$in': $!"; # 需要重新打开,目前不知道更好的办法
    while(<IN>) {
      $i++; # 第几行
      my $j;
      foreach ( split /\s+/, $_ ) {
        $j++;
        if ($_ == $try) {
	      print OUT $pops[$i - 1], "\t", $pops[$j - 1], "\n"; # 对应的各个坐标
	    }
      }
    }
  }
}

脚本3:

tri2square.pl

#!perl
use strict;
use warnings;
# 三角阵生对称矩阵

my $in = 'dist_pop57_1978.txt';
my $out = 'square_dist_pop57_1978.txt';

my %dist;

open IN, '<', $in or die "Can't open '$in': $!";
open OUT, '>', $out or die "Can't write to '$out': $!";
my $m = 0;
while (<IN>) {
	$m++;
	my $n = 1;
	for my $x (split) {
		$dist{$m}{$n} = $dist{$n}{$m} = $x;
		$n++;
	}
}
close IN;

for my $i (1 .. $m) {
	for my $j (1 .. ($m-1)) {
		print OUT "$dist{$i}{$j}\t";
	}
	print OUT "$dist{$i}{$m}\n";
}

close OUT;

print "Done!\n";




你可能感兴趣的:(perl,distance,popgene,三角阵生方阵)