R语言可视化(十):集合图绘制

10.UpSet集合图绘制


清除当前环境中的变量

rm(list=ls())

设置工作目录

setwd("C:/Users/Dell/Desktop/R_Plots/10upset/")

使用UpSetR包绘制集合图

library(UpSetR)
# 加载UpSetR包的内置数据集
movies <- read.csv(system.file("extdata", "movies.csv", package = "UpSetR"), header = T, sep = ";")
dim(movies)
## [1] 3883   21

head(movies)
##                                 Name ReleaseDate Action Adventure Children
## 1                   Toy Story (1995)        1995      0         0        1
## 2                     Jumanji (1995)        1995      0         1        1
## 3            Grumpier Old Men (1995)        1995      0         0        0
## 4           Waiting to Exhale (1995)        1995      0         0        0
## 5 Father of the Bride Part II (1995)        1995      0         0        0
## 6                        Heat (1995)        1995      1         0        0
##   Comedy Crime Documentary Drama Fantasy Noir Horror Musical Mystery
## 1      1     0           0     0       0    0      0       0       0
## 2      0     0           0     0       1    0      0       0       0
## 3      1     0           0     0       0    0      0       0       0
## 4      1     0           0     1       0    0      0       0       0
## 5      1     0           0     0       0    0      0       0       0
## 6      0     1           0     0       0    0      0       0       0
##   Romance SciFi Thriller War Western AvgRating Watches
## 1       0     0        0   0       0      4.15    2077
## 2       0     0        0   0       0      3.20     701
## 3       1     0        0   0       0      3.02     478
## 4       0     0        0   0       0      2.73     170
## 5       0     0        0   0       0      3.01     296
## 6       0     0        1   0       0      3.88     940

基础绘图

upset(data = movies, 
      sets = c("Action", "Adventure", "Comedy", "Drama", "Mystery", 
               "Thriller", "Romance", "War", "Western"), # 指定所用的集合
      number.angles = 30, # 设置相交集合柱状图上方数字的角度
      point.size = 3.5, # 设置矩阵中圆圈的大小
      line.size = 2, # 设置矩阵中连接圆圈的线的大小
      mainbar.y.label = "Genre Intersections", # 设置y轴标签
      sets.x.label = "Movies Per Genre", # 设置x轴标签
      mb.ratio = c(0.6, 0.4), # 设置bar plot和matrix plot图形高度的占比
      order.by = "freq")
image.png
upset(data = movies, 
     sets = c("Action", "Adventure", "Comedy", "Drama", "Mystery", 
              "Thriller", "Romance", "War", "Western"), # 指定所用的集合
     number.angles = 45, # 设置相交集合柱状图上方数字的角度
     point.size = 3, # 设置矩阵中圆圈的大小
     line.size = 1.5, # 设置矩阵中连接圆圈的线的大小
     mainbar.y.label = "Genre Intersections", # 设置y轴标签
     sets.x.label = "Movies Per Genre", # 设置x轴标签
     mb.ratio = c(0.7, 0.3), # 设置bar plot和matrix plot图形高度的占比
     order.by = "degree", # 更改排序的方式
     keep.order = TRUE # 保持集合按输入的顺序排序
     )
image.png
# 使用fromList函数输入列表格式的集合数据
# example of list input (list of named vectors)
listInput <- list(one = c(1, 2, 3, 5, 7, 8, 11, 12, 13), 
                  two = c(1, 2, 4, 5, 10), 
                  three = c(1, 5, 6, 7, 8, 9, 10, 12, 13))
listInput
## $one
## [1]  1  2  3  5  7  8 11 12 13
## 
## $two
## [1]  1  2  4  5 10
## 
## $three
## [1]  1  5  6  7  8  9 10 12 13

upset(fromList(listInput), order.by = "freq")
image.png
# 使用fromExpression函数输入表达式向量格式的集合数据
# example of expression input
expressionInput <- c(one = 2, two = 1, three = 2, 
                     `one&two` = 1, `one&three` = 4, 
                     `two&three` = 1, `one&two&three` = 2)
expressionInput
##           one           two         three       one&two     one&three 
##             2             1             2             1             4 
##     two&three one&two&three 
##             1             2

upset(fromExpression(expressionInput), order.by = "freq",point.size = 2,line.size = 1)
image.png

使用set.metadata参数添加元数据信息

# 构建metadata信息
sets <- names(movies[3:19])
avgRottenTomatoesScore <- round(runif(17, min = 0, max = 90))
metadata <- as.data.frame(cbind(sets, avgRottenTomatoesScore))
names(metadata) <- c("sets", "avgRottenTomatoesScore")
head(metadata)
##          sets avgRottenTomatoesScore
## 1      Action                     73
## 2   Adventure                     76
## 3    Children                     30
## 4      Comedy                      5
## 5       Crime                     44
## 6 Documentary                      8

metadata$avgRottenTomatoesScore <- as.numeric(as.character(metadata$avgRottenTomatoesScore))

添加元数据条形图

upset(movies, 
      sets = c("Action", "Adventure", "Comedy", "Drama", "Mystery", "Thriller", "Romance", "War", "Western"),
      set.metadata = list(data = metadata, 
                          plots = list(list(type = "hist", column = "avgRottenTomatoesScore", assign = 20))))
image.png

添加元数据热图

Cities <- sample(c("Boston", "NYC", "LA"), 17, replace = T)
metadata <- cbind(metadata, Cities)
metadata$Cities <- as.character(metadata$Cities)
metadata[which(metadata$sets %in% c("Drama", "Comedy", "Action", "Thriller", "Romance")), ]
##        sets avgRottenTomatoesScore Cities
## 1    Action                     73     LA
## 4    Comedy                      5     LA
## 7     Drama                     55    NYC
## 13  Romance                     43 Boston
## 15 Thriller                     51     LA

head(metadata)
##          sets avgRottenTomatoesScore Cities
## 1      Action                     73     LA
## 2   Adventure                     76     LA
## 3    Children                     30    NYC
## 4      Comedy                      5     LA
## 5       Crime                     44    NYC
## 6 Documentary                      8     LA

upset(movies, 
      sets = c("Drama", "Comedy", "Action", "Thriller", "Romance"),
      set.metadata = list(data = metadata, 
                          plots = list(list(type = "heat", column = "Cities", assign = 10, colors = c(Boston = "green", NYC = "navy", LA = "purple")))))
image.png
upset(movies, 
      sets = c("Drama", "Comedy", "Action", "Thriller", "Romance"),
      set.metadata = list(data = metadata, 
                          plots = list(list(type = "heat", column = "Cities", assign = 10, colors = c(Boston = "green", NYC = "navy", LA = "purple")), 
                                       list(type = "heat", column = "avgRottenTomatoesScore", assign = 10))))
image.png

添加元数据文本

upset(movies, 
      sets = c("Drama", "Comedy", "Action", "Thriller", "Romance"),
      set.metadata = list(data = metadata, 
                          plots = list(list(type = "text", column = "Cities", assign = 10, colors = c(Boston = "green", NYC = "navy", LA = "purple")))))
image.png

添加元数据矩阵条形图

upset(movies, 
      sets = c("Drama", "Comedy", "Action", "Thriller", "Romance"),
      set.metadata = list(data = metadata, 
                          plots = list(list(type = "hist", column = "avgRottenTomatoesScore", assign = 20), 
                                       list(type = "matrix_rows", column = "Cities", colors = c(Boston = "green", NYC = "navy", LA = "purple"), alpha = 0.5))))
image.png

使用queries参数查询数据

head(movies)
##                                 Name ReleaseDate Action Adventure Children
## 1                   Toy Story (1995)        1995      0         0        1
## 2                     Jumanji (1995)        1995      0         1        1
## 3            Grumpier Old Men (1995)        1995      0         0        0
## 4           Waiting to Exhale (1995)        1995      0         0        0
## 5 Father of the Bride Part II (1995)        1995      0         0        0
## 6                        Heat (1995)        1995      1         0        0
##   Comedy Crime Documentary Drama Fantasy Noir Horror Musical Mystery
## 1      1     0           0     0       0    0      0       0       0
## 2      0     0           0     0       1    0      0       0       0
## 3      1     0           0     0       0    0      0       0       0
## 4      1     0           0     1       0    0      0       0       0
## 5      1     0           0     0       0    0      0       0       0
## 6      0     1           0     0       0    0      0       0       0
##   Romance SciFi Thriller War Western AvgRating Watches
## 1       0     0        0   0       0      4.15    2077
## 2       0     0        0   0       0      3.20     701
## 3       1     0        0   0       0      3.02     478
## 4       0     0        0   0       0      2.73     170
## 5       0     0        0   0       0      3.01     296
## 6       0     0        1   0       0      3.88     940

# 使用内置的相交查询intersects来查找或显示特定相交处的元素。
upset(movies, 
      queries = list(list(query = intersects, params = list("Drama", "Comedy", "Action"), color = "orange", active = T), 
                     list(query = intersects, params = list("Drama"), color = "red", active = F), 
                     list(query = intersects, params = list("Action", "Drama"), active = T)))
image.png
# 使用内置的元素查询elements来可视化某些元素在相交之间的分布方式
upset(movies, 
      queries = list(list(query = elements, params = list("AvgRating", 3.5, 4.1), color = "blue", active = T), 
                     list(query = elements, params = list("ReleaseDate", 1980, 1990, 2000), color = "red", active = F)))
image.png
# 添加查询图例
upset(movies, 
      query.legend = "top", 
      queries = list(list(query = intersects, params = list("Drama", "Comedy", "Action"), color = "orange", active = T, query.name = "Funny action"), 
                     list(query = intersects, params = list("Drama"), color = "red", active = F), 
                     list(query = intersects, params = list("Action", "Drama"), active = T, query.name = "Emotional action")))
image.png

使用attribute.plots参数添加属性图

head(movies)
##                                 Name ReleaseDate Action Adventure Children
## 1                   Toy Story (1995)        1995      0         0        1
## 2                     Jumanji (1995)        1995      0         1        1
## 3            Grumpier Old Men (1995)        1995      0         0        0
## 4           Waiting to Exhale (1995)        1995      0         0        0
## 5 Father of the Bride Part II (1995)        1995      0         0        0
## 6                        Heat (1995)        1995      1         0        0
##   Comedy Crime Documentary Drama Fantasy Noir Horror Musical Mystery
## 1      1     0           0     0       0    0      0       0       0
## 2      0     0           0     0       1    0      0       0       0
## 3      1     0           0     0       0    0      0       0       0
## 4      1     0           0     1       0    0      0       0       0
## 5      1     0           0     0       0    0      0       0       0
## 6      0     1           0     0       0    0      0       0       0
##   Romance SciFi Thriller War Western AvgRating Watches
## 1       0     0        0   0       0      4.15    2077
## 2       0     0        0   0       0      3.20     701
## 3       1     0        0   0       0      3.02     478
## 4       0     0        0   0       0      2.73     170
## 5       0     0        0   0       0      3.01     296
## 6       0     0        1   0       0      3.88     940

添加内置属性直方图

upset(movies, 
      main.bar.color = "black", 
      queries = list(list(query = intersects, params = list("Drama"), active = T)), 
      attribute.plots = list(gridrows = 50, 
                             plots = list(list(plot = histogram, x = "ReleaseDate", queries = F), 
                                          list(plot = histogram, x = "AvgRating", queries = T)), ncols = 2))

image.png

添加内置属性散点图

upset(movies, 
      main.bar.color = "black", 
      queries = list(list(query = intersects, params = list("Drama"), color = "red", active = F),
                     list(query = intersects, params = list("Action", "Drama"), active = T), 
                     list(query = intersects, params = list("Drama", "Comedy", "Action"), color = "orange", active = T)), 
      attribute.plots = list(gridrows = 45, 
                             plots = list(list(plot = scatter_plot, x = "ReleaseDate", y = "AvgRating", queries = T), 
                                          list(plot = scatter_plot, x = "AvgRating", y = "Watches", queries = F)), ncols = 2), query.legend = "bottom")
image.png

添加属性箱线图

upset(movies, boxplot.summary = c("AvgRating", "ReleaseDate"))
image.png

一次性添加元数据,查询和属性图

upset(movies, 
      set.metadata = list(data = metadata, 
                          plots = list(list(type = "hist", column = "avgRottenTomatoesScore", assign = 20),
                                       list(type = "text", column = "Cities", assign = 5, colors = c(Boston = "green", NYC = "navy", LA = "purple")), 
                                       list(type = "matrix_rows", column = "Cities", colors = c(Boston = "green", NYC = "navy", LA = "purple"), alpha = 0.5))), 
      queries = list(list(query = intersects, params = list("Drama"), color = "red", active = F), 
                     list(query = intersects, params = list("Action", "Drama"), active = T), 
                     list(query = intersects, params = list("Drama", "Comedy", "Action"), color = "orange", active = T)), 
      attribute.plots = list(gridrows = 45, 
                             plots = list(list(plot = scatter_plot, x = "ReleaseDate", y = "AvgRating", queries = T), 
                                          list(plot = scatter_plot, x = "AvgRating", y = "Watches", queries = F)), ncols = 2), query.legend = "bottom")
image.png
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Chinese (Simplified)_China.936 
## [2] LC_CTYPE=Chinese (Simplified)_China.936   
## [3] LC_MONETARY=Chinese (Simplified)_China.936
## [4] LC_NUMERIC=C                              
## [5] LC_TIME=Chinese (Simplified)_China.936    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] UpSetR_1.4.0
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.5       knitr_1.23       magrittr_1.5     tidyselect_0.2.5
##  [5] munsell_0.5.0    colorspace_1.4-1 R6_2.4.0         rlang_0.4.7     
##  [9] plyr_1.8.4       stringr_1.4.0    dplyr_0.8.3      tools_3.6.0     
## [13] grid_3.6.0       gtable_0.3.0     xfun_0.8         withr_2.1.2     
## [17] htmltools_0.3.6  assertthat_0.2.1 yaml_2.2.0       lazyeval_0.2.2  
## [21] digest_0.6.20    tibble_2.1.3     crayon_1.3.4     gridExtra_2.3   
## [25] purrr_0.3.2      ggplot2_3.2.0    glue_1.3.1       evaluate_0.14   
## [29] rmarkdown_1.13   labeling_0.3     stringi_1.4.3    compiler_3.6.0  
## [33] pillar_1.4.2     scales_1.0.0     pkgconfig_2.0.2

你可能感兴趣的:(R语言可视化(十):集合图绘制)