1. pivot

One variable might be spread across multiple columns. --> pivot_longer()
One observation might be scattered across multiple rows. --> pivot_wider()

1.1 pivot_longer

pivot_longer(data, cols, names_to = "name", values_to = "value", values.drop.na = FALSE)
data 需要转换的数据
cols 进行转换的列, the columns to pivot are specified with dplyr::select() style notation.
names_to 列名转换为
values_to 值放置于
values_drop_na 去掉缺失值

> table4a
# A tibble: 3 x 3
  country     `1999` `2000`
*           
1 Afghanistan    745   2666
2 Brazil       37737  80488
3 China       212258 213766
> table4a %>% 
+   pivot_longer(c(`1999`, `2000`), names_to = "year", values_to = "cases")
# A tibble: 6 x 3
  country     year   cases
           
1 Afghanistan 1999     745
2 Afghanistan 2000    2666
3 Brazil      1999   37737
4 Brazil      2000   80488
5 China       1999  212258
6 China       2000  213766

# table4a中列名1999与2000都不符合命名标准，所以需要用反括号 ’ 引用

1.2 pivot_wider

pivot_wider(data, names_from = 'name', values_from = 'values')

> table2
# A tibble: 12 x 4
   country      year type            count
                      
 1 Afghanistan  1999 cases             745
 2 Afghanistan  1999 population   19987071
 3 Afghanistan  2000 cases            2666
 4 Afghanistan  2000 population   20595360
 5 Brazil       1999 cases           37737
 6 Brazil       1999 population  172006362
 7 Brazil       2000 cases           80488
 8 Brazil       2000 population  174504898
 9 China        1999 cases          212258
10 China        1999 population 1272915272
11 China        2000 cases          213766
12 China        2000 population 1280428583
> table2 %>%
+   pivot_wider(names_from = type, values_from = count)
# A tibble: 6 x 4
  country      year  cases population
                 
1 Afghanistan  1999    745   19987071
2 Afghanistan  2000   2666   20595360
3 Brazil       1999  37737  172006362
4 Brazil       2000  80488  174504898
5 China        1999 212258 1272915272
6 China        2000 213766 1280428583

exercise:

> preg <- tribble(
+   ~pregnant, ~male, ~female,
+   "yes",     NA,    10,
+   "no",      20,    12
+ )
> preg
# A tibble: 2 x 3
  pregnant  male female
        
1 yes         NA     10
2 no          20     12

> preg %>% 
+   preg %>% 
  pivot_longer(cols = 2:3,
               # cols = male:female,
               # cols = c("male","female"),
               names_to = "gender", 
               values_to = "count")
# A tibble: 4 x 3
  pregnant gender count
        
1 yes      male      NA
2 yes      female    10
3 no       male      20
4 no       female    12

2 separate and unite

2.1 separate

separate(data, col, into, sep=, convert, remove)
col 需要拆分的列
into = 拆分为
sep = 拆分位置（特定符号或制定位置）
convert 是否转换数据类型（默认FALASE）
remove 删除原有列（默认TRUE）

> table3
# A tibble: 6 x 3
  country      year rate             
*                     
1 Afghanistan  1999 745/19987071     
2 Afghanistan  2000 2666/20595360    
3 Brazil       1999 37737/172006362  
4 Brazil       2000 80488/174504898  
5 China        1999 212258/1272915272
6 China        2000 213766/1280428583

> table3 %>% 
+   separate(rate, into = c("cases", "population"), sep = "/", convert = TRUE)
# A tibble: 6 x 4
  country      year  cases population
                 
1 Afghanistan  1999    745   19987071
2 Afghanistan  2000   2666   20595360
3 Brazil       1999  37737  172006362
4 Brazil       2000  80488  174504898
5 China        1999 212258 1272915272
6 China        2000 213766 1280428583

extract() uses regluar expression to capture groups and turn groups into multiple columns.

2.2 separate_rows

separate_rows(data, ..., sep = "[^[:alnum:].]+", convert = FALSE)

> teacher <- data.frame(
+   teacher = c("a", "b"),
+   class = c("1,2", "1,3,5")
+ )
> teacher 
  teacher class
1       a   1,2
2       b 1,3,5
> teacher %>% separate_rows("class", sep = ",", convert = TRUE)
  teacher class
1       a     1
2       a     2
3       b     1
4       b     3
5       b     5

2.3 unite

unite(data, col, ..., sep = "_", remove = TRUE, na.rm = FALSE)

> table5
# A tibble: 6 x 4
  country     century year  rate             
*                        
1 Afghanistan 19      99    745/19987071     
2 Afghanistan 20      00    2666/20595360    
3 Brazil      19      99    37737/172006362  
4 Brazil      20      00    80488/174504898  
5 China       19      99    212258/1272915272
6 China       20      00    213766/1280428583

> table5 %>%
+   unite(col = new,century, year, sep = "")
# A tibble: 6 x 3
  country     new   rate             
                      
1 Afghanistan 1999  745/19987071     
2 Afghanistan 2000  2666/20595360    
3 Brazil      1999  37737/172006362  
4 Brazil      2000  80488/174504898  
5 China       1999  212258/1272915272
6 China       2000  213766/1280428583

R-tidyr 数据整理