热图是无监督聚类的一种
第一行:标签无监督聚类的结果状态。绿色为一类,白色另一类
第二行: 真实分类状态(金标准)。绿色下的灰色为分类正确的,黑色为分类错误的。白色下的黑色为分类正确的,灰色为分类错误的。
第一列:特征无监督聚类的结果。特征分类如图例所示。
其他部分:每一行为一个特征,每一列为一个例子,特征值由颜色标识。绿色为一类,蓝色为另一类。
如果形成明显的方块,证明具有很好的区分度。
R语言程序:
环境:R studio
setwd("E:/func_cluster")
getwd() #查看当前工作目录
library(limma)
library(gplots)
library(pheatmap)
df<- read.table("feature690of75_2.csv",header=TRUE,sep=",") #load
set<- read.table("labelof75.csv",header=FALSE,sep=",") #
Name<- read.table("name3.csv",header=TRUE,sep=",") #load
Type<-read.table("type2.csv",header=FALSE,sep=",") #load
colnames(Type) <- ('Type')
#归一化
#减去最小值
center <- sweep(df, 1, apply(df, 1, min),'-')
#算出极差
R <- apply(df, 1, max) - apply(df,1,min)
#把减去均值后的矩阵在列的方向上除以极差向量
df<- sweep(center, 1, R, "/")
set <- set[,1 ]
set<-set-1
f <- factor(as.character(set))
design <- model.matrix(~f)
fit <- eBayes(lmFit(df,design))
color.map <- function(cl) { if (cl=="1") "#FF0000" else if (cl=="-1") "#0000FF" else "#00FF00"} #红绿蓝
patientcolors <- unlist(lapply(set, color.map))
selected <- fit$p.value[, 2] < 0.05
dfsel <- df[selected, ]
data <- data.matrix(dfsel)
hc<-hclust(dist(t(data)))
dd.col<-as.dendrogram(hc)
groups <- cutree(hc,k=2)
annotation_row <- data.frame(FeatureType = factor(rep(c("Age", "Sex", "BUS_Based_Basic", "BUS_Basd_Wavelet_A", "BUS_Basd_Wavelet_H", "BUS_Basd_Wavelet_V", "BUS_Basd_Wavelet_D", "SEUS_Based_R", "SEUS_Based_G", "SEUS_Based_B"), c(1, 1, 86, 86, 86, 86, 86, 86, 86, 86)))) #特征各自属于的类别
annotation_col<-data.frame(Metastasis=factor(patientcolors,labels=c("negtive","postive")),Cluster=groups)
ann_colors = list(
Cluster = c("#F0FFF0","#48D1CC" ),
Metastasis = c("postive" = "#C0C0C0", "negtive" = "#333333"),
FeatureType = c(Age="white", Sex="#FFFF00" ,BUS_Based_Basic = "#6495ED", BUS_Basd_Wavelet_A = "#F5F5F5", BUS_Basd_Wavelet_H = "#DCDCDC", BUS_Basd_Wavelet_V = "#D3D3D3", BUS_Basd_Wavelet_D = "#A9A9A9", SEUS_Based_R = "#90EE90", SEUS_Based_G = "#66CDAA",SEUS_Based_B = "#32CD32")
)
pmap = pheatmap(data,annotation_row=annotation_row,annotation_col=annotation_col,annotation_colors = ann_colors,clustering_distance_row = "minkowski", color = colorRampPalette(c("#0099ff", "#F5FFFA", "#7FFFD4"))(50),cellwidth = 6, cellheight =3, fontsize=6, fontsize_row=6) #为样品分组
dim(dfsel)