R可视化:PCA图的可视化修改

PCA图的可视化修改

PCA是降维方法之一,对PCA结果进行可视化的方法有很多,现在总结如下几种可视化方法。更多知识分享请到 https://zouhua.top/

数据

使用PCA的结果数据也即是points数据。

library(dplyr)
library(ggplot2)
library(tibble)
library(patchwork)
dat <- read.table("datset/pca_site.txt", header = T, sep = "\t", row.names = 1) %>% 
  mutate(PID = rep(paste("P", c(1:8)), 3))
R可视化:PCA图的可视化修改_第1张图片

代码

  • 普通方式

    p <- ggplot(dat, aes(x=PC1, y=PC2))+ 
      geom_point(aes(color = group1))+
      labs(x = 'PCA1: 30%', y = 'PCA2: 20%')+
      scale_color_manual(values = c('red', 'purple', 'green'))+  
      theme_bw()+
      theme(panel.grid = element_blank(),
            panel.background = element_rect(color = 'black', fill = 'transparent'),
            legend.key = element_rect(fill = 'transparent'))
    
  • 加置信区间

    p1 <- p + stat_ellipse(aes(color = group1), level = 0.95, linetype = 2, show.legend = FALSE)
    
  • 设置阴影区

    p2 <- p + stat_ellipse(aes(fill = group1), geom = 'polygon', level = 0.95, alpha = 0.1, show.legend = FALSE) +
      scale_fill_manual(values = c('red', 'purple', 'green'))
    
  • 设置标记分组

    #参考来源
    #https://stackoverflow.com/questions/42575769/how-to-modify-the-backgroup-color-of-label-in-the-multiple-ggproto-using-ggplot2
    
    geom_enterotype <- function(mapping = NULL, data = NULL, stat = "identity",  position = "identity", 
                                alpha = 0.3, prop = 0.5, ..., lineend = "butt", linejoin = "round", 
                                linemitre = 1, arrow = NULL, na.rm = FALSE, parse = FALSE, 
                                nudge_x = 0, nudge_y = 0, label.padding = unit(0.15, "lines"), 
                                label.r = unit(0.15, "lines"), label.size = 0.1, 
                                show.legend = TRUE, inherit.aes = TRUE) {
    library(ggplot2)
    # create new stat and geom for PCA scatterplot with ellipses
    StatEllipse <- ggproto("StatEllipse", Stat, 
        required_aes = c("x", "y"), 
        compute_group = function(., data, scales, level = 0.75, segments = 51, ...) {
        library(MASS)
        dfn <- 2
        dfd <- length(data$x) - 1
        if (dfd < 3) {
            ellipse <- rbind(c(NA, NA))
        } else {
            v <- cov.trob(cbind(data$x, data$y))
            shape <- v$cov
            center <- v$center
            radius <- sqrt(dfn * qf(level, dfn, dfd))
            angles <- (0:segments) * 2 * pi/segments
            unit.circle <- cbind(cos(angles), sin(angles))
            ellipse <- t(center + radius * t(unit.circle %*% chol(shape)))
        }
        ellipse <- as.data.frame(ellipse)
        colnames(ellipse) <- c("x", "y")
        return(ellipse)
    })
    
    # write new ggproto 
    GeomEllipse <- ggproto("GeomEllipse", Geom, 
        draw_group = function(data, panel_scales, coord) {
                n <- nrow(data)
                if (n == 1) 
                return(zeroGrob())
                munched <- coord_munch(coord, data, panel_scales)
                munched <- munched[order(munched$group), ]
                first_idx <- !duplicated(munched$group)
                first_rows <- munched[first_idx, ]
                grid::pathGrob(munched$x, munched$y, default.units = "native", 
                               id = munched$group, 
                               gp = grid::gpar(col = first_rows$colour, 
                               fill = alpha(first_rows$fill, first_rows$alpha), lwd = first_rows$size * .pt, lty = first_rows$linetype))
                 }, 
                 default_aes = aes(colour = "NA", fill = "grey20", size = 0.5, linetype = 1, alpha = NA, prop = 0.5), 
                 handle_na = function(data, params) {
                       data
               }, 
              required_aes = c("x", "y"), 
              draw_key = draw_key_path
    )
    
    # create a new stat for PCA scatterplot with lines which totally directs to the center
    StatConline <- ggproto("StatConline", Stat, 
        compute_group = function(data, scales) {
        library(miscTools)
        library(MASS)
        df <- data.frame(data$x,data$y)
        mat <- as.matrix(df)
        center <- cov.trob(df)$center
        names(center)<- NULL 
        mat_insert <- insertRow(mat, 2, center )
        for(i in 1:nrow(mat)) {
        mat_insert <- insertRow( mat_insert, 2*i, center )
        next
        }
        mat_insert <- mat_insert[-c(2:3),]
        rownames(mat_insert) <- NULL
        mat_insert <- as.data.frame(mat_insert,center)
        colnames(mat_insert) =c("x","y")
        return(mat_insert)
        },
        required_aes = c("x", "y")
    
    )
    
    # create a new stat for PCA scatterplot with center labels
    StatLabel <- ggproto("StatLabel" ,Stat,
                   compute_group = function(data, scales) {
                    library(MASS)
                    df <- data.frame(data$x,data$y)
                    center <- cov.trob(df)$center
                    names(center)<- NULL 
                    center <- t(as.data.frame(center))
                    center <- as.data.frame(cbind(center))
                    colnames(center) <- c("x","y")
                    rownames(center) <- NULL
                    return(center)
                    },
                    required_aes = c("x", "y")
    )
    
    
    layer1 <- layer(data = data, mapping = mapping, stat = stat, geom = GeomPoint, 
            position = position, show.legend = show.legend, inherit.aes = inherit.aes, 
            params = list(na.rm = na.rm, ...))
    layer2 <- layer(stat = StatEllipse, data = data, mapping = mapping, geom = GeomEllipse, position = position, show.legend = FALSE, 
                         inherit.aes = inherit.aes, params = list(na.rm = na.rm, prop = prop, alpha = alpha, ...))
    layer3 <- layer(data = data, mapping = mapping, stat =  StatConline, geom = GeomPath, 
                                       position = position, show.legend = show.legend, inherit.aes = inherit.aes, 
                                       params = list(lineend = lineend, linejoin = linejoin, 
                                       linemitre = linemitre, arrow = arrow, na.rm = na.rm, ...))
    if (!missing(nudge_x) || !missing(nudge_y)) {
        if (!missing(position)) {
          stop("Specify either `position` or `nudge_x`/`nudge_y`", 
               call. = FALSE)
        }
        position <- position_nudge(nudge_x, nudge_y)
      }
    layer4 <- layer(data = data, mapping = mapping, stat = StatLabel, geom = GeomLabel, 
            position = position, show.legend = FALSE, inherit.aes = inherit.aes, 
            params = list(parse = parse, label.padding = label.padding, 
                          label.r = label.r, label.size = label.size, na.rm = na.rm, ...))
    return(list(layer1,layer2,layer3,layer4))
    }
    
    
    p3 <- p + geom_enterotype(aes(fill = group1, color = group1, label = group1), show.legend = FALSE) +
      scale_fill_manual(values = c('#ffa6a9', '#e8b4fd', '#c7ffc4'))
    
  • 设置连线:设置时间序列研究

    group1_label <- cbind(PC1=tapply(dat$PC1, dat$group1, mean), PC2=tapply(dat$PC2, dat$group1, mean)) %>%
      data.frame() %>%
      rownames_to_column("group1")
    group1_border <- plyr::ddply(dat, 'group1', function(x)x[chull(x[[1]], x[[2]]), ])
    p4 <- p + geom_line(aes(group=PID), linetype = "dashed", alpha = 0.3) + 
      geom_text(data = group1_label, aes(x=PC1, y=PC2, label=group1, color=group1)) + 
      geom_polygon(data = group1_border, aes(fill = group1), color = "black", alpha = 0.1, show.legend = FALSE)+
      scale_color_manual(values = c('red', 'purple', 'green'))+
      guides(group=F, fill=F, color=F)+
      geom_hline(yintercept = 0, linetype = "dashed")+
      geom_vline(xintercept = 0, linetype = "dashed")
    

合并结果

p1 + p2 + p3 + p4 + plot_layout(ncol = 2) +
  plot_annotation(tag_levels = c(1), tag_prefix = "p")
R可视化:PCA图的可视化修改_第2张图片

引用

  1. R语言绘制PCA、RDA等排序图

参考文章如引起任何侵权问题,可以与我联系,谢谢。

你可能感兴趣的:(R可视化:PCA图的可视化修改)