08--Correlation Analysis to Validate Results

Purpose:

To validate the cell type enrichment results from the xCell, do correlation analysis to see the trend between expected each cell type proportions and real each cell type counts.

1. Data preprocessing

  • transform row and column of xcell resuts
a <- read.table("xCell__xCell_0412052318.txt",header = TRUE,sep="\t")
b <- t(a)
write.table(b,file="xCell_res2",quote = FALSE,sep="\t",col.names = FALSE)
  • combined xCell results with bloodCount to make new matrix
paste bloodCounts_2 xCell_res2 > cor_input
awk '{$39="";print $0}'  cor_input > cor_input_2
sed 's/\t\t/\t/g' cor_input_2 > cor_input_3

2. Correlation analysis using cor function or rcorr function in Hmisc package

  • Load data
    • all data of cell type
setwd("F:/sdc_project")
mydata <- read.table("cor_input_3",header = TRUE,sep="\t",row.names = 1) 
> mydata[1:4,1:8]
      BAS   BA CCMH EOS   EO  HB  HCM   HE
31201 0.4 0.02  345 2.9 0.15 162 32.9 4.93
31202 0.3 0.02  315 1.5 0.10 130 26.3 4.95
31303 0.6 0.03  327 1.9 0.09 148 27.4 5.40
31304 0.3 0.02  330 2.0 0.12 146 29.7 4.92
> dim(mydata)
[1] 915 104
  • Cell type in bloodCounts realsted xCell results
part_data <- mydata[,c(1,2,4,5,8,11,16,17,18,19,20,23,24,41,57,59,76,81,85)] 
> part_data[1:4,]
      BAS   BA EOS   EO   HE   LE  MON  MOt  NEU   NE PTES RETIS  RETAB Basophils
31201 0.4 0.02 2.9 0.15 4.93 5.16 10.3 0.53 60.2 3.11  163  1.31 0.0646    0.6411
31202 0.3 0.02 1.5 0.10 4.95 6.85  6.6 0.45 62.0 4.25  156  1.50 0.0743    0.1741
31303 0.6 0.03 1.9 0.09 5.40 4.77  6.3 0.30 59.1 2.82  201  0.96 0.0518    0.6074
31304 0.3 0.02 2.0 0.12 4.92 5.89  6.6 0.39 50.7 2.98  214  1.08 0.0531    0.0312
      Eosinophils Erythrocytes Monocytes Neutrophils Platelets
31201      0.0918       0.1071    0.1476      0.0000    0.1022
31202      0.3104       0.1849    0.0276      0.1492    0.2070
31303      0.1864       0.0817    0.0000      0.0745    0.2708
31304      0.0000       0.0000    0.0000      0.0000    0.1819
> dim(part_data)
[1] 915  19
  • Calculate the correlation coefficient matrix
    • use cor function
# (1) use cor function
res <- cor(mydata)
res <- round(res, 2) 
res[1:4,39:49]
part_res <- cor(part_data)
part_res <- round(part_res, 2)
part_res[1:4,]
  • use rcorr() function
    cor() function only can calculate correlation coefficient,but can't provide significant p -value, rcorr() function in Hmisc package can calculate both correlation coefficient and pvalue .
    usege : rcorr(x, type =c(“pearson”,“spearman”))
# install the package
source('http://bioconductor.org/biocLite.R')
biocLite("Hmisc")
library(Hmisc)

res2 <- rcorr(as.matrix(mydata))
res2
# part_data
part_res2 <-  rcorr(as.matrix(part_data))
part_res2
# use res2$r、res2$P to get correlation coefficient and p-value
res2$r
res2$P
# part_data
part_res2$r
part_res2$P
> part_res2
               BAS    BA   EOS    EO    HE    LE   MON   MOt   NEU    NE  PTES RETIS
BAS           1.00  0.89  0.22  0.15 -0.09 -0.19  0.06 -0.10 -0.19 -0.23  0.01 -0.05
BA            0.89  1.00  0.23  0.29 -0.04  0.22 -0.03  0.17 -0.06  0.12  0.19  0.00
EOS           0.22  0.23  1.00  0.93  0.01 -0.03  0.01 -0.02 -0.43 -0.22  0.09 -0.19
EO            0.15  0.29  0.93  1.00  0.03  0.23 -0.06  0.14 -0.32  0.01  0.20 -0.14
HE           -0.09 -0.04  0.01  0.03  1.00  0.09  0.07  0.14 -0.03  0.05 -0.17 -0.02
LE           -0.19  0.22 -0.03  0.23  0.09  1.00 -0.25  0.64  0.38  0.91  0.37  0.07
MON           0.06 -0.03  0.01 -0.06  0.07 -0.25  1.00  0.54 -0.32 -0.31 -0.17  0.02
MOt          -0.10  0.17 -0.02  0.14  0.14  0.64  0.54  1.00  0.07  0.51  0.20  0.09
NEU          -0.19 -0.06 -0.43 -0.32 -0.03  0.38 -0.32  0.07  1.00  0.71 -0.06  0.15
NE           -0.23  0.12 -0.22  0.01  0.05  0.91 -0.31  0.51  0.71  1.00  0.23  0.10
PTES          0.01  0.19  0.09  0.20 -0.17  0.37 -0.17  0.20 -0.06  0.23  1.00 -0.08
RETIS        -0.05  0.00 -0.19 -0.14 -0.02  0.07  0.02  0.09  0.15  0.10 -0.08  1.00
RETAB        -0.07  0.00 -0.18 -0.11  0.27  0.10  0.04  0.13  0.14  0.11 -0.12  0.95
Basophils     0.01 -0.02 -0.04 -0.07  0.12 -0.08  0.21  0.10  0.02 -0.05 -0.16 -0.12

3. Visualization

#abbr.colnames
symnum(res, abbr.colnames = FALSE)
symnum(part_res, abbr.colnames = FALSE)
biocLite("corrplot")
library(corrplot)

corrplot(res, type = "upper", tl.col = "black", tl.srt = 45)
corrplot(part_res, type = "upper", tl.col = "black", tl.srt = 45)
08--Correlation Analysis to Validate Results_第1张图片
part_res
  • Combined correlation coefficient and P-value
# Insignificant correlations are leaved blank
corrplot(res2$r, type="upper",p.mat = res2$P, sig.level = 0.01, tl.col = "black", insig = "blank")

corrplot(part_res2$r, order="hclust",type="upper",p.mat = res2$P, sig.level = 0.05, tl.col = "black",insig = "blank")
corrplot.mixed(part_res2$r, order="hclust",p.mat = res2$P, sig.level = 0.05, tl.col = "black",insig = "blank")
08--Correlation Analysis to Validate Results_第2张图片
part_res2_r_mixplot
  • Use chart.Correlation(): Draw scatter plots
    Use chart.Correlation() in PerformanceAnalytics package
biocLite("PerformanceAnalytics")
library(PerformanceAnalytics)#加载包
chart.Correlation(mydata, histogram=TRUE, pch=19)
chart.Correlation(part_data, histogram=TRUE, pch=19)
08--Correlation Analysis to Validate Results_第3张图片
image.png
  • heatmap
col<- colorRampPalette(c("blue", "white", "red"))(20)
heatmap(x = res2$r, col = col, symm = TRUE)
heatmap(x=part_res2$r,col=col,symn=TRUE)
08--Correlation Analysis to Validate Results_第4张图片
part_data_resheatmap

你可能感兴趣的:(08--Correlation Analysis to Validate Results)