dplyr学习笔记

什么是dplyr?

dplyr是一个非常强大的转换概括行列表格的R包。主要功能:
select():选择符合条件的列;
filter():过滤行;
arrange():对行进行重排序;
mutate():创建新的列;
summarise():概括表的信息,如最值,均值等;

group_by():分组函数,采用“split-apply-combine”的概念。

安装dplyr

library(dplyr)
#数据集mtcars
head(mtcars)
                   mpg cyl disp  hp drat    wt  qsec vs am gear carb
Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

select():选择符合条件的列

head(select(mtcars, mpg, carb))
                   mpg carb
Mazda RX4         21.0    4
Mazda RX4 Wag     21.0    4
Datsun 710        22.8    1
Hornet 4 Drive    21.4    1
Hornet Sportabout 18.7    2
Valiant           18.1    1
#用-除去不要的列
head(select(mtcars, -mpg))
                  cyl disp  hp drat    wt  qsec vs am gear carb
Mazda RX4           6  160 110 3.90 2.620 16.46  0  1    4    4
Mazda RX4 Wag       6  160 110 3.90 2.875 17.02  0  1    4    4
Datsun 710          4  108  93 3.85 2.320 18.61  1  1    4    1
Hornet 4 Drive      6  258 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout   8  360 175 3.15 3.440 17.02  0  0    3    2
Valiant             6  225 105 2.76 3.460 20.22  1  0    3    1
#用:选择列的起始范围,从mpg开始,到wt结束
head(select(mtcars, mpg:wt))
                   mpg cyl disp  hp drat    wt
Mazda RX4         21.0   6  160 110 3.90 2.620
Mazda RX4 Wag     21.0   6  160 110 3.90 2.875
Datsun 710        22.8   4  108  93 3.85 2.320
Hornet 4 Drive    21.4   6  258 110 3.08 3.215
Hornet Sportabout 18.7   8  360 175 3.15 3.440
Valiant           18.1   6  225 105 2.76 3.460

filter():过滤行

filter(mtcars, qsec >= 20)
   mpg cyl  disp  hp drat    wt  qsec vs am gear carb
1 18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
2 24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
3 22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
4 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
filter(mtcars, qsec >= 20, carb == 2)
   mpg cyl  disp hp drat   wt qsec vs am gear carb
1 24.4   4 146.7 62 3.69 3.19 20.0  1  0    4    2
2 22.8   4 140.8 95 3.92 3.15 22.9  1  0    4    2
filter(mtcars, disp %in% c(108, 225))
   mpg cyl disp  hp drat   wt  qsec vs am gear carb
1 22.8   4  108  93 3.85 2.32 18.61  1  1    4    1
2 18.1   6  225 105 2.76 3.46 20.22  1  0    3    1

%>%管道操作

mtcars %>% select(mpg, hp) %>% head
                   mpg  hp
Mazda RX4         21.0 110
Mazda RX4 Wag     21.0 110
Datsun 710        22.8  93
Hornet 4 Drive    21.4 110
Hornet Sportabout 18.7 175
Valiant           18.1 105

arrange():对行进行重排序

mtcars %>% arrange(mpg) %>% head
   mpg cyl disp  hp drat    wt  qsec vs am gear carb
1 10.4   8  472 205 2.93 5.250 17.98  0  0    3    4
2 10.4   8  460 215 3.00 5.424 17.82  0  0    3    4
3 13.3   8  350 245 3.73 3.840 15.41  0  0    3    4
4 14.3   8  360 245 3.21 3.570 15.84  0  0    3    4
5 14.7   8  440 230 3.23 5.345 17.42  0  0    3    4
6 15.0   8  301 335 3.54 3.570 14.60  0  1    5    8
mtcars %>% select(mpg, wt, carb) %>% arrange(mpg) %>% head
   mpg    wt carb
1 10.4 5.250    4
2 10.4 5.424    4
3 13.3 3.840    4
4 14.3 3.570    4
5 14.7 5.345    4
6 15.0 3.570    8
mtcars %>% select(mpg, wt, carb) %>% arrange(mpg) %>% filter(wt >= 5)
   mpg    wt carb
1 10.4 5.250    4
2 10.4 5.424    4
3 14.7 5.345    4
mutate():创建新的列

mtcars %>% mutate(mpg_carb = mpg / carb) %>% head
   mpg cyl disp  hp drat    wt  qsec vs am gear carb mpg_carb
1 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4     5.25
2 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4     5.25
3 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1    22.80
4 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1    21.40
5 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2     9.35
6 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1    18.10
summarise():概括表的信息

mtcars %>% summarise(avg_mpg = mean(mpg), min_hp = min(hp), total = n())
   avg_mpg min_hp total
1 20.09062     52    32
group_by():分组函数

mtcars %>% group_by(carb) %>%summarise(avg_mpg = mean(mpg), min_hp = min(hp), total = n())
Source: local data frame [6 x 4]

  carb  avg_mpg min_hp total
1    1 25.34286     65     7
2    2 22.40000     52    10
3    3 16.30000    180     3
4    4 15.79000    110    10
5    6 19.70000    175     1
6    8 15.00000    335     1


参考资料:

Introduction to dplyr



你可能感兴趣的:(r,dplyr)