样品表达量的相关性系数计算及画图

在刚开始学的时候不会用R来计算相关性系数,也不会画图,结果博主很悲催地用perl的svg进行画图,很久之前的作品:

#!/usr/bin/env perl
use warnings;
use strict;
use lib "/bin/svg_lib";
use PLOT qw(Paper End Point Rect Line Text Polyline Path);

die "Usage:perl $0 \n" unless @ARGV == 1;
my $xml = ();
$xml .= Paper();
my ($init_x, $init_y) = (20, 20); 
my $point_size = 1; # point size 
my ($i, $j) = (0, 0); # row col
my $size = 120; # Rect size
my $interval = 80; # Rect size
my $total_rect = 5;

open(FA, $ARGV[0]) || die $!;
while()
{
	chomp;
	my @tmp = split;
	open(IN, $tmp[0]) || die $!;
	# draw border start
	if($i >= $total_rect) # number  perl row
	{
		$j++;
		$i = $j;
	}
	$xml .= Rect($init_x+$i*$interval,$init_y+$j*$interval,$size,$size,2,"black","white");
	while()
	{
		chomp;
		if(/(\S+)\t(\S+)/)
		{
			my ($x, $y) = /(\S+)\t(\S+)/; # point 
			$x = int( 20 * (1.4 + $x)); 
			$y = int( 20 * (1.4 + $y));
			$xml .= Rect($init_x+$i*$interval+$x,$init_y+$j*$interval+$size-$y,$point_size,$point_size,1,"red","red"); # draw points
		}
	}
	my $str = "$tmp[0] $tmp[1]";
	$xml .= Text($str, $init_x+$i*$interval, $init_y+$j*$interval+$size+15,20,1,0,"black","black"); # draw text
	close IN;
	$i++;
}close FA;
$xml .= End();
print $xml;

SVG包:点击打开链接

学了R之后才发现原来相关性系数这么简单- -!

#!/usr/bin/env perl
use warnings;
use strict;
die "perl $0  " unless @ARGV eq 2;

my $in1 = shift;
my $in2 = shift;
my ($id1) = $in1 =~ /(\w+)\.fpkm/;
my ($id2) = $in2 =~ /(\w+)\.fpkm/;
my $out = "$id1\_$id2.coe";

my %hash;
open IN1, $in1 or die $!;
while()
{
	chomp;
	my @tmp = split;
	$hash{$tmp[0]} = $tmp[1];
}
close IN1;

my $share;
open IN2, $in2 or die $!;
open OUT, ">$out" or die $!;
while()
{
	chomp;
	my @tmp = split;
	if(exists $hash{$tmp[0]})
	{
		print OUT "$tmp[0]\t$hash{$tmp[0]}\t$tmp[1]\n";
		$share ++;
	}
}
close IN2;

open RCMD, "| /opt/blc/genome/biosoft/R/bin/R --no-save -q" || die $!;
print RCMD << "RCODE";
ma <- read.table("$out", sep = "\\t")
coe <- cor(ma\$V2, ma\$V3, method = c("pearson"))
result <- paste("$id1\_$id2", coe)
result
pdf("$out.pdf")
plot(ma\$V2, ma\$V3, log = 'xy' , xlab = "$id1", ylab = "$id2", xlim = c(0.1,100000) , ylim = c(0.1,100000) , col = "red", type = "p", pch = ".")
legend(0.1, 100000, c(paste("Pearson=", coe), paste("ShareGene=", $share)))
dev.off()
RCODE
close RCMD;

让各位大牛见笑了~~~

你可能感兴趣的:(生物信息学)