R可视化：PCA图的可视化修改

PCA图的可视化修改

PCA是降维方法之一，对PCA结果进行可视化的方法有很多，现在总结如下几种可视化方法。更多知识分享请到 https://zouhua.top/。

数据

使用PCA的结果数据也即是points数据。

library(dplyr)
library(ggplot2)
library(tibble)
library(patchwork)
dat <- read.table("datset/pca_site.txt", header = T, sep = "\t", row.names = 1) %>% 
  mutate(PID = rep(paste("P", c(1:8)), 3))

代码

普通方式

p <- ggplot(dat, aes(x=PC1, y=PC2))+ 
  geom_point(aes(color = group1))+
  labs(x = 'PCA1: 30%', y = 'PCA2: 20%')+
  scale_color_manual(values = c('red', 'purple', 'green'))+  
  theme_bw()+
  theme(panel.grid = element_blank(),
        panel.background = element_rect(color = 'black', fill = 'transparent'),
        legend.key = element_rect(fill = 'transparent'))

加置信区间

p1 <- p + stat_ellipse(aes(color = group1), level = 0.95, linetype = 2, show.legend = FALSE)

设置阴影区

p2 <- p + stat_ellipse(aes(fill = group1), geom = 'polygon', level = 0.95, alpha = 0.1, show.legend = FALSE) +
  scale_fill_manual(values = c('red', 'purple', 'green'))

设置标记分组

#参考来源
#https://stackoverflow.com/questions/42575769/how-to-modify-the-backgroup-color-of-label-in-the-multiple-ggproto-using-ggplot2

geom_enterotype <- function(mapping = NULL, data = NULL, stat = "identity",  position = "identity", 
                            alpha = 0.3, prop = 0.5, ..., lineend = "butt", linejoin = "round", 
                            linemitre = 1, arrow = NULL, na.rm = FALSE, parse = FALSE, 
                            nudge_x = 0, nudge_y = 0, label.padding = unit(0.15, "lines"), 
                            label.r = unit(0.15, "lines"), label.size = 0.1, 
                            show.legend = TRUE, inherit.aes = TRUE) {
library(ggplot2)
# create new stat and geom for PCA scatterplot with ellipses
StatEllipse <- ggproto("StatEllipse", Stat, 
    required_aes = c("x", "y"), 
    compute_group = function(., data, scales, level = 0.75, segments = 51, ...) {
    library(MASS)
    dfn <- 2
    dfd <- length(data$x) - 1
    if (dfd < 3) {
        ellipse <- rbind(c(NA, NA))
    } else {
        v <- cov.trob(cbind(data$x, data$y))
        shape <- v$cov
        center <- v$center
        radius <- sqrt(dfn * qf(level, dfn, dfd))
        angles <- (0:segments) * 2 * pi/segments
        unit.circle <- cbind(cos(angles), sin(angles))
        ellipse <- t(center + radius * t(unit.circle %*% chol(shape)))
    }
    ellipse <- as.data.frame(ellipse)
    colnames(ellipse) <- c("x", "y")
    return(ellipse)
})

# write new ggproto 
GeomEllipse <- ggproto("GeomEllipse", Geom, 
    draw_group = function(data, panel_scales, coord) {
            n <- nrow(data)
            if (n == 1) 
            return(zeroGrob())
            munched <- coord_munch(coord, data, panel_scales)
            munched <- munched[order(munched$group), ]
            first_idx <- !duplicated(munched$group)
            first_rows <- munched[first_idx, ]
            grid::pathGrob(munched$x, munched$y, default.units = "native", 
                           id = munched$group, 
                           gp = grid::gpar(col = first_rows$colour, 
                           fill = alpha(first_rows$fill, first_rows$alpha), lwd = first_rows$size * .pt, lty = first_rows$linetype))
             }, 
             default_aes = aes(colour = "NA", fill = "grey20", size = 0.5, linetype = 1, alpha = NA, prop = 0.5), 
             handle_na = function(data, params) {
                   data
           }, 
          required_aes = c("x", "y"), 
          draw_key = draw_key_path
)

# create a new stat for PCA scatterplot with lines which totally directs to the center
StatConline <- ggproto("StatConline", Stat, 
    compute_group = function(data, scales) {
    library(miscTools)
    library(MASS)
    df <- data.frame(data$x,data$y)
    mat <- as.matrix(df)
    center <- cov.trob(df)$center
    names(center)<- NULL 
    mat_insert <- insertRow(mat, 2, center )
    for(i in 1:nrow(mat)) {
    mat_insert <- insertRow( mat_insert, 2*i, center )
    next
    }
    mat_insert <- mat_insert[-c(2:3),]
    rownames(mat_insert) <- NULL
    mat_insert <- as.data.frame(mat_insert,center)
    colnames(mat_insert) =c("x","y")
    return(mat_insert)
    },
    required_aes = c("x", "y")

)

# create a new stat for PCA scatterplot with center labels
StatLabel <- ggproto("StatLabel" ,Stat,
               compute_group = function(data, scales) {
                library(MASS)
                df <- data.frame(data$x,data$y)
                center <- cov.trob(df)$center
                names(center)<- NULL 
                center <- t(as.data.frame(center))
                center <- as.data.frame(cbind(center))
                colnames(center) <- c("x","y")
                rownames(center) <- NULL
                return(center)
                },
                required_aes = c("x", "y")
)


layer1 <- layer(data = data, mapping = mapping, stat = stat, geom = GeomPoint, 
        position = position, show.legend = show.legend, inherit.aes = inherit.aes, 
        params = list(na.rm = na.rm, ...))
layer2 <- layer(stat = StatEllipse, data = data, mapping = mapping, geom = GeomEllipse, position = position, show.legend = FALSE, 
                     inherit.aes = inherit.aes, params = list(na.rm = na.rm, prop = prop, alpha = alpha, ...))
layer3 <- layer(data = data, mapping = mapping, stat =  StatConline, geom = GeomPath, 
                                   position = position, show.legend = show.legend, inherit.aes = inherit.aes, 
                                   params = list(lineend = lineend, linejoin = linejoin, 
                                   linemitre = linemitre, arrow = arrow, na.rm = na.rm, ...))
if (!missing(nudge_x) || !missing(nudge_y)) {
    if (!missing(position)) {
      stop("Specify either `position` or `nudge_x`/`nudge_y`", 
           call. = FALSE)
    }
    position <- position_nudge(nudge_x, nudge_y)
  }
layer4 <- layer(data = data, mapping = mapping, stat = StatLabel, geom = GeomLabel, 
        position = position, show.legend = FALSE, inherit.aes = inherit.aes, 
        params = list(parse = parse, label.padding = label.padding, 
                      label.r = label.r, label.size = label.size, na.rm = na.rm, ...))
return(list(layer1,layer2,layer3,layer4))
}


p3 <- p + geom_enterotype(aes(fill = group1, color = group1, label = group1), show.legend = FALSE) +
  scale_fill_manual(values = c('#ffa6a9', '#e8b4fd', '#c7ffc4'))

设置连线：设置时间序列研究

group1_label <- cbind(PC1=tapply(dat$PC1, dat$group1, mean), PC2=tapply(dat$PC2, dat$group1, mean)) %>%
  data.frame() %>%
  rownames_to_column("group1")
group1_border <- plyr::ddply(dat, 'group1', function(x)x[chull(x[[1]], x[[2]]), ])
p4 <- p + geom_line(aes(group=PID), linetype = "dashed", alpha = 0.3) + 
  geom_text(data = group1_label, aes(x=PC1, y=PC2, label=group1, color=group1)) + 
  geom_polygon(data = group1_border, aes(fill = group1), color = "black", alpha = 0.1, show.legend = FALSE)+
  scale_color_manual(values = c('red', 'purple', 'green'))+
  guides(group=F, fill=F, color=F)+
  geom_hline(yintercept = 0, linetype = "dashed")+
  geom_vline(xintercept = 0, linetype = "dashed")

合并结果

p1 + p2 + p3 + p4 + plot_layout(ncol = 2) +
  plot_annotation(tag_levels = c(1), tag_prefix = "p")

引用

R语言绘制PCA、RDA等排序图

参考文章如引起任何侵权问题，可以与我联系，谢谢。

R可视化：PCA图的可视化修改

PCA图的可视化修改

数据

代码

合并结果

引用

你可能感兴趣的:(R可视化：PCA图的可视化修改)