dplyr之参数传递

1. 概述

在shiny中经常需要根据input来向dplyr的函数提供参数,以便进行相应数据操作。dplyr在filter()、mutate()、summarise()、arrange()、select()、group_by()这些函数中,都支持Non-standard evaluation(NSE),很方便的进行参数传递。

1.1 三种方式

## 加载dplyr包
require("dplyr")
## 通过下列查看相关资料
vignette("nse")

通过select_、group_by_、filter_、summarise_、mutate_、arrange_与.dots结合来操作数据。支持下列三种方式,向dplyr传递参数。

  • With a formula: ~ mean(mpg)

  • With quote(): quote(mean(mpg))

  • As a string: "mean(mpg)"

下面是个简单的栗子

## 使用mtcars作为测试数据
data <- mtcars

myfun1 <- function(data, group_name, measure_input){
  data %>%
    group_by_(group_name) %>%
    summarise_(measure_input) %>% 
    ungroup() -> data
  return(data)
}
group_name <- ~ cyl
measure_input <- ~mean(mpg)
group_name <- quote(cyl)
measure_input <- quote(mean(mpg))
group_name <- "cyl"
measure_input <- "mean(mpg)"
myfun1(data = mtcars, group_name = group_name, measure_input = measure_input)

1.2 传递多个参数

通过.dots来传递多个参数

myfun1 <- function(data, group_name, measure_input){
  data %>%
    group_by_(.dots = group_name) %>%
    summarise_(.dots = measure_input) %>% 
    ungroup() -> data
  return(data)
}
group_name <- "cyl;carb"
group_name <- strsplit(group_name,";")[[1]]
measure_input <- "sum(disp);mean(mpg)"
measure_input <- strsplit(measure_input,";")[[1]]

group_name <- c("cyl","carb")
measure_input <- c("sum(disp)","mean(mpg)")
myfun1(data = data, group_name = group_name, measure_input = measure_input)

1.3 设置变量名

group_name <- c("cyl","carb")
measure_input <- c("sum(disp)","mean(mpg)")

measure_input <- as.list(measure_input)
measure_input <- setNames(measure_input, c("sum", "mean"))
myfun1(data = data, group_name = group_name, measure_input = measure_input)

1.4 将更多信息参数化

myfun1 <- function(data, filter_input){
  data %>%
    filter_(.dots = filter_input) -> data
  return(data)
}
## 多个筛选条件
filter_input <- c("cyl>4","vs==0","carb %in% c(3,4)")
myfun1(data = data, filter_input = filter_input)
filter_vars <- c("cyl","vs","carb")
filter_meausre <- c(">","==","in")
filter_value <- c("4","0","c(3,4)")
filter_meausre <- sub(pattern="in", replacement="%in%", filter_meausre)

filter_input <- paste(filter_vars, filter_meausre, filter_value)
myfun1(data = data, filter_input = filter_input)

2. examples

2.1 select_

data <- mtcars
select_name <- c("mpg","disp","cyl")
mtcars %>% select(mpg,disp,cyl) %>% head()
mtcars %>% select(one_of(select_name)) %>% head()

mtcars %>% select_(.dots = select_name) %>% head()
mtcars %>% select_(as.name(select_name[1]),as.name(select_name[2]),as.name(select_name[3])) %>% head()

myfun2 <- function(data, select_name){
  data %>%
    select_(.dots = select_name) -> data
  return(data)
}
select_name <- c("mpg","disp","cyl","carb")
myfun2(data = data, select_name = select_name)

2.2 filter_

myfun2 <- function(data, select_name, filter_input){
  data %>%
    filter_(.dots = filter_input) %>%
    select_(.dots = select_name) -> data
  return(data)
}
select_name <- c("mpg","disp","cyl","carb")

filter_input <- "cyl>4;vs==0;carb %in% c(3,4)"
filter_input <- as.list(strsplit(filter_input,";")[[1]])

filter_input <- c("cyl>4","vs==0","carb %in% c(3,4)")

myfun2(data = mtcars, select_name = select_name, 
       filter_input = filter_input)

2.3 group_by_和summarise_

myfun2 <- function(data, select_name = NULL, 
                   filter_input = NULL, 
                   group_name = NULL, 
                   measure_input = NULL, 
                   measure_rename = NULL){
  if(is.null(select_name)){
    select_name <- names(data)
  }
  if(!is.null(measure_rename)){
    measure_input <- as.list(measure_input) %>% 
      setNames(measure_rename)
  }
  data %>%
    filter_(.dots = filter_input) %>%
    select_(.dots = select_name) %>%
    group_by_(.dots = group_name) %>%
    summarise_(.dots = measure_input) %>%
    ungroup() %>%
    as.data.frame() -> data
  return(data)
}


select_name <- c("mpg","disp","cyl","carb")
filter_input <- c("cyl>4","vs==0","carb %in% c(3,4)")
group_name <- c("cyl","carb")
measure_input <- "sum(disp);mean(mpg)"
measure_input <- as.list(strsplit(measure_input,";")[[1]])
measure_input <- c("sum(disp)","mean(mpg)")
measure_rename <- c("disp_sum","mpg_mean")

myfun2(data=mtcars, select_name = select_name, 
       filter_input = filter_input, group_name = group_name, 
       measure_input = measure_input, measure_rename = measure_rename)

2.4 mutete_

myfun2 <- function(data, mutate_input = NULL,
                   mutate_name = NULL,
                   select_name = NULL, 
                   filter_input = NULL, 
                   group_name = NULL, 
                   measure_input = NULL, 
                   measure_rename = NULL){
  if(is.null(select_name)){
    select_name <- names(data)
  }
  if(!is.null(measure_rename)){
    measure_input <- as.list(measure_input) %>% 
      setNames(measure_rename)
  }
  if(!is.null(mutate_name)){
    mutate_input <- as.list(mutate_input) %>% 
        setNames(mutate_name)
  }
  data %>%
    mutate_(.dots = mutate_input) %>%
    filter_(.dots = filter_input) %>%
    select_(.dots = select_name) %>%
    group_by_(.dots = group_name) %>%
    summarise_(.dots = measure_input) %>%
    ungroup() %>%
    as.data.frame() -> data
  return(data)
}
mutate_input <- c("cyl*carb","disp+mpg")
mutate_name <- c("cc","dm")
select_name <- c("mpg","disp","cyl","carb","cc")
myfun2(data=mtcars, 
       mutate_input = mutate_input, mutate_name = mutate_name, 
       select_name = select_name, 
       filter_input = filter_input, group_name = group_name, 
       measure_input = measure_input, measure_rename = measure_rename)

3. lazyeval包

lazyeval包提供了一种NSE的使用方法,让我们在其他地方也可以使用NSE方法进行数据操作。

4. 参考

  • Advanced R: Non-standard evaluation

  • vignette("nse")

你可能感兴趣的:(r)