功能1--筛选出条件内的子集,也就是筛选出满足条件的行
filter()
filter(airquality,Day==1,Temp<=81) #为且
filter(airquality,Day==1 & Temp<=81)#为且
subset()
其实平常还是习惯用subset
subset(airquality,Day==1 & Temp<=81) #为且
subset(airquality,Day==1 ,Temp<=81) #为或
r自带函数处理也很便利:
airquality[airquality$Day==1 & airquality$Temp<=81,]
功能2--选择指定的列
select()
select(airquality,Ozone,Month,Day) #每个要选择的列名
select(airquality,Ozone:Month) #Ozone:Month之间所有的列
也可用“-”号反向删除一些列
行列筛选结合
airquality%>%filter(Day==1)%>%select(Ozone:Temp)
subset(airquality,Day==1 & Temp==81,select = c(Ozone,Temp))
%>%:连接符,从原数据开始在此基础上连接每一步操作
功能3:添加列/对现有列进行变形
mutate()
mutate(airquality,Add=airquality$Month+airquality$Day) #增加一列命名为Add 数据为month与day的和
mutate(airquality,date=paste(airquality$Month,airquality$Day,sep="-")) #增加一列date将月与日在一列体现
这里提到的paste,也就是
功能4:字符串连接
paste()
paste (..., sep = " ", collapse = NULL) #将多个元素连接在一起,sep为连接符,默认为空格,collapse为是否为一个元素
例:
1: paste(airquality$Ozone,airquality$Solar.R)
结果:
[1] "41 190" "36 118" "12 149" "18 313" "NA NA" "28 NA" "23 299" "19 99" "8 19"
[10] "NA 194" "7 NA" "16 256" "11 290" "14 274" "18 65" "14 334" "34 307" "6 78"
2: paste(airquality$Ozone,airquality$Solar.R,sep="")
[1] "41190" "36118" "12149" "18313" "NANA" "28NA" "23299" "1999" "819" "NA194"
[11] "7NA" "16256" "11290" "14274" "1865" "14334" "34307" "678" "30322" "1144"
3:paste(airquality$Ozone,airquality$Solar.R,sep="",collapse = ";")
[1] "41190;36118;12149;18313;NANA;28NA;23299;1999;819;NA194;7NA;16256;11290;14274;1865;14334;34307;678;30322;1144;18;11320;425;3292;NA66;NA266;NANA;2313;45252;1152......"
同系列另一个函数
paste0()
paste0()相当于sep = ""下的paste()
也就是 paste0("a","b")= paste("a","b",sep="")
功能5:其他对于字符串的操作
strsplit( ) 字符串分割函数
nchar( ) 计算字符串长度
> nchar("abd")
substr( )及substring( ) 字符串截取函数
substr(x, start, stop) #截取从start到stop的内容
substring(x, first, last) #支持last为多个情况
chartr( ) 字符串替换函数
> chartr("abc","123",x="abcdEf")
[1] "123dEf"
toupper( )、tolower( )及casefold( ) 大小写转换函数
> toupper("abcdef") #转换成大些
[1] "ABCDEF"
> tolower("abcdEf") #转换成小些
[1] "abcdef"
> casefold("abcdEf",upper = TRUE) #按条件转换成大写或小写
[1] "ABCDEF"
> casefold("abcdEf",upper = FALSE)
[1] "abcdef"
fromat()
千位符:format(x,big.mark = ',',sep='\n')
......其他待补充