筛选、排序、分段、拆分、分组、合并、拼接、转置、索引、重塑
筛选
1 用which来筛选符合条件的值
wage2_3 <- wage2_1[which(wage2_1$female==1),]
wage2_3
wage2_4 <- wage2_1[which(wage2_1$female==0),]
wage2_4
#2 用subset来筛选符合条件的值
wage2_5_male <- subset(wage2_1,female == ‘0’,select = wage:married)
wage2_5_male
wage2_6_female <- subset(wage2_1,female == ‘1’,select = wage:married)
wage2_6_female
library(dplyr)
wage2_7_male<- wage2_1 %>%
group_by(female) %>%
filter(female == 0)
library(haven)
WAGE1_DTA <- read_dta(“C:\Users\13886\Desktop\鏌ヨ\R\WAGE1.DTA.dta”)
View(WAGE1_DTA)
wage2 <- data.frame(wage = WAGE1_DTA$wage,
educ = WAGE1_DTA$educ,
exper = WAGE1_DTA$exper,
tenture = WAGE1_DTA$tenure,
female = WAGE1_DTA$female,
married = WAGE1_DTA$married)
wage2
library(dplyr)
wage2 %>% separate(female, c(“feamale”, “male”))
wage2_male<- wage2 %>%
group_by(female) %>%
filter(female == 0)
wage2_female<- wage2 %>%
group_by(female) %>%
filter(female == 1 )
wage2_male
wage2_female
wage3 <- log(wage2)
wage3
wage4 <- rbind(wage2,wage3)
wage4
wage5 <- cbind(wage2,wage3)
wage5
wage2_male_1<- wage2_male %>%
group_by(exper) %>%
filter(exper >20 & exper<40 )
wage2_male_1
a<- wage2 %>%
group_by(exper) %>%
filter(exper > 20 )
a
b <- wage2_male %>%
group_by(exper) %>%
filter(exper < 40 & exper >20)
b
library(dplyr)
options(digits = 0)
set.seed(1)
df <- data.frame(ID = 1:12,
Class = rep(c(1,2,3),4),
Chinese = runif(12,min = 0,max = 100),
Math = runif(12,min = 0,max = 100),
English = runif(12,min = 0,max = 100))
for (i in 1:ncol(df)) {
df[,i] <- as.integer(df[,i])
}
查找与修改
df
df[2,]
df[,4]
df$Chinese
df[3]
df[which(df$ID==4),]
df[3,4]
df[3,‘Math’]
df[which(df$Chinese == 57), ‘Math’]
df[which(df$Class ==2),‘English’]
df[1,] <- c(1,2,65,59,73)
df[,‘English’] <- c(23,45,67,87,34,46,87,95,43,76,23,94)
df
赋值给上述查询某个值的操做即可
df[which(df$Chinese <20), ‘Chinese’] <-20
df
可用rownames()及colnames()得到数据框的行列名,
rownames(data.frame)[行号] 或 colnames(data.frame)[列号]
可得到指定位置的行名或者列名
colnames(df)
colnames(df)[4]
colnames(df)[4] <- ‘math’
colnames(df) <- c(“ID”,“Class”,“Chinese”,“Math”,“English”)
colnames(df)
删除行或列,仅需要选出该数据框的部分行或列,然后将其赋给该变量即可,
其中在列号或行号前添加-表示不选该行或该列
在这里,为了方便接下来的操作,我们将选出后的数据框赋给其他变量
要实现删除操作应当将选出后的数据框赋给自己
df.tmp <- df[, c(1,3,5)]
df.tmp
df.tmp <- df[-3,]
df.tmp
添加行 data.frame [新行号,] <- 行值
df[13,] <- c(13,2,62,19,38)
df
df <- df[c(1,1:12),]
df
df <- df[rep(1:12,each = 2),]
df
添加列 data.frame$ 新列名 <- 列名
df$Physics <- c(23,34,67,23,56,67,78,23,54,56,67,34,50)
df
df[,7] <- c(1:13)
df
mutate(df,Chemistry = Chinese + Math + English +Physics)
dplyr 包常用函数
library(dplyr)
options(digits = 0)
set.seed(1)
df <- data.frame(ID = 1:12,
Class = rep(c(1,2,3),4),
Chinese = runif(12,min = 0,max = 100),
Math = runif(12,min = 0,max = 100),
English = runif(12,min = 0,max = 100))
for (i in 1:ncol(df)) {
df[,i] <- as.integer(df[,i])
}
df
arrange(df,Chinese)
arrange(df,Chinese,Math)
arrange(df,desc(Chinese))
df1 <- df[rep(1:nrow(df),each = 2),]
df1
df1 <- distinct(df1)
df1
group_by()函数分组 summarise()函数概括
group_by()与summarise()函数常连用,
用于对不同的分组进行操作,在这里再介绍一个管道函数“%>%”,
其作用是把左件的值发送给右件的表达式,
并作为右件表达式函数的第一个参数
df %>%
group_by(Class) %>%
summarise(max = max(Chinese))
#filter()函数 筛选
filter(.data, …, .preserve = FALSE)
选出符合条件的行(返回数据框格式)
df %>%
group_by(Class) %>%
filter(Chinese == max(Chinese))
select(df,ID,Chinese,Math,English)
df1 <- data.frame(ID = 13, Class = 2,
Chinese = 65, Math = 26, English = 84)
df1
rbind(df,df1)
library(dplyr)
options(digits = 0)
set.seed(1)
df <- data.frame(ID = 1:12,
Class = rep(c(1,2,3),4),
Chinese = runif(12,min = 0,max = 100),
Math = runif(12,min = 0,max = 100),
English = runif(12,min = 0,max = 100))
for (i in 1:ncol(df)) {
df[,i] <- as.integer(df[,i])
}
dplyr 包常用函数
library(dplyr)
options(digits = 0)
set.seed(1)
df <- data.frame(ID = 1:12,
Class = rep(c(1,2,3),4),
Chinese = runif(12,min = 0,max = 100),
Math = runif(12,min = 0,max = 100),
English = runif(12,min = 0,max = 100))
for (i in 1:ncol(df)) {
df[,i] <- as.integer(df[,i])
df